In [36]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# <center> Logistic Regression

In [37]:
class LogisticModel:

    def __init__(self,multiclass):

        """
            intialises the Logistic model class

            Inputs:
                multiclass: specifics if model is a binary classifier or not
        """

        self.W = None
        self.multiclass = multiclass

    
    def predict(self,data,percentage):
        """
            Makes prediction on X dataset
        """

        sigmoid = lambda z: 1/(1+np.exp(-z))

        m,n = data.shape
        X = np.ones((m,1))
        X = np.append(X,data,axis=1)

        if self.multiclass == True:

            ans = []

            output = X @ self.W.T

            m,n = output.shape

            for i in range(0,m,1):
                
                temp = output[i,:]
                index = np.argmax(temp) + 1
                ans.append(index)

            return np.array(ans)


        elif self.multiclass == False and percentage == True:
            return sigmoid(self.W @ X.T)
        else:
            return np.round(sigmoid(self.W @ X.T)).flatten()


    def train(self,data,y,alpha,tol,L,num_iter,seed):

        """
        computes theta Matrix using the logistic Regression
        
        inputs:
            X        : data in the form of the design matrix
            y        : the labels associated with the data
            alpha    : the learning rate
            tol      : the margin of error
            num_iter : the number of times algorithms must loop
            L        : the regularisation parameter(lambda)
        """

        m,n = data.shape

        if(seed == True):
            np.random.seed(101)
            Theta0 = np.random.randn(1,n+1)

        else:
            Theta0 = np.random.randn(1,n+1)
        
        X = np.ones((m,1))
        X = np.append(X,data,axis=1)



        if(self.multiclass == True):
            self.W = self.One_vs_all(X,y,Theta0,alpha,tol,L,num_iter)
        else:
            self.W = self.LogisticRegression(X,y,Theta0,alpha,tol,L,num_iter)
            

    def LogisticRegression(self,X,y,theta0,alpha,tol,L,num_iter):
        """
            computes theta values using the psuedo inverse
            
            inputs:
                X        : data in the form of the design matrix
                y        : the labels associated with the data
                theta0   : the intial guess on the learning parameters
                alpha    : the learning rate
                tol      : the margin of error
                num_iter : the number of times algorithms must loop
                L        : the regularisation parameter(lambda)
                
            outputs:
                theta    : the learning parameters given the model
        """
        
        
        sigmoid = lambda z: 1/(1+np.exp(-z))
        
        i = 1
        V = np.copy(theta0)
        V[0] = 0
        theta_new  = theta0 - alpha*(sigmoid(theta0 @ X.T) - y) @ X + L*V
        
        while np.linalg.norm(theta_new-theta0) >tol and i <=num_iter:
            i+=1
            theta0 = theta_new
            V = np.copy(theta0)
            V[0] = 0
            theta_new  = theta0 - alpha*(sigmoid(theta0 @ X.T) - y) @ X + L*V
        
        return theta_new


    def One_vs_all(self,X,y,Theta0,alpha,tol,L,num_iter):

        """
        computes theta Matrix using the logistic Regression
        
        inputs:
            X        : data in the form of the design matrix
            y        : the labels associated with the data
            theta0   : the intial guess on the learning parameters
            alpha    : the learning rate
            tol      : the margin of error
            num_iter : the number of times algorithms must loop
            L        : the regularisation parameter(lambda)
            
        outputs:
            theta    : the learning parameters given the model
        """
    
        outcomes = np.unique(y)
        Param = []
    
        for i in range(len(outcomes)):
        
            value = outcomes[i]
            y_sub = self.y_subset(y,value)
            temp_theta = Theta0[i,:]
            temp_param = self.LogisticRegression(X,y_sub,temp_theta,alpha,tol,L,num_iter)
            Param.append(temp_param)
        
        return np.array(Param)



    def y_subset(self,y,value):
    
        """
            creates the sub labels for that specific label to train multiclass logistic regression
            
            inputs:
                y        : the true labels associated with the data
                value    : the label that will be used to generate the y_subset
            
            outputs:
                y_subset : the labels that will be used to train the logistic regression 
        """
        
        ans = []
        
        for i in range(0,len(y),1):
            
            if(y[i]==value):
                ans.append(1)
            else:
                ans.append(0)
        return ans

    def confusion_matrix(self,output,y):
        """
            prints out the confusion matrix information

            Inputs:
                outputs : the predicted outcome of the model
                y       : the true label of the data
        """
        outcomes = list(np.unique(y))
        
        matrix = np.zeros((len(outcomes),len(outcomes)))
        
        for i in range(0,len(y),1):
            
            predicted_value = output[i]
            true_value = y[i]
            
            row = outcomes.index(predicted_value)
            col = outcomes.index(true_value)
            
            matrix[row,col] += 1
            
        
        accurary = 0
        
        for i in range(0,len(outcomes),1):
            accurary+= matrix[i][i]
            
        accurary/= len(y)
        f_alarm = matrix[1][0]/(matrix[1][0]+matrix[1][1])
        miss = matrix[0][1]/(matrix[0][0]+matrix[0][1])
        recall = 1-miss
        precision = matrix[0][0]/(matrix[0][0]+matrix[1][0])
        con_matrix = pd.DataFrame(data = matrix,index= outcomes,columns=outcomes)

        print('\n============================================================================\n')
        print('accurary :',accurary)
        # print('\n')
        print('error :',1-accurary)
        # print('\n')
        # print('false alarm :',f_alarm)
        # print('\n')
        # print('miss :',miss)
        # print('\n')
        # print('recall :',recall)
        # print('\n')
        # print('precision :',precision)
        # print('\n')
        print('confusion matrix : \n')
        print(con_matrix)
        print('\n============================================================================\n')

# <center> Bitcoin

In [38]:
#cleaning data#
data = pd.read_csv('data/coin_Bitcoin.csv')
data = data.drop(['SNo','Name','Symbol','Date'],axis=1)
data['change'] = np.nan
#data['Date'] = pd.to_datetime(data['Date'])

In [39]:
#getting the change in value#
n = data.shape[0]

for i in range(0,n-1,1):
    diff = data['Marketcap'].iloc[i+1] - data['Marketcap'].iloc[i]

    if diff >=0:
        data['change'].iloc[i] = 1
    else:
        data['change'].iloc[i] = 0

data = data.iloc[0:n-1]

In [40]:
train,validate,test = np.split(data.sample(frac=1,random_state=42),[int(0.7*len(data)),int(0.9*len(data))])

In [41]:
#training#
y_train = train['change'].to_numpy()
X_train = train.iloc[:,0:-1].to_numpy()

#testing#
y_test = test['change'].to_numpy()
X_test = test.iloc[:,0:-1].to_numpy()

#validation#
y_validate = validate['change'].to_numpy()
X_validate = validate.iloc[:,0:-1].to_numpy()

In [42]:
model = LogisticModel(multiclass=False)
model.train(X_train,y_train,alpha=0.0003,tol=0.03,L=0,num_iter=1000,seed = False)
output = model.predict(X_train,percentage = False)
model.confusion_matrix(output,y_train)

  sigmoid = lambda z: 1/(1+np.exp(-z))




accurary : 0.5504061156235069
error : 0.44959388437649306
confusion matrix : 

       0.0     1.0
0.0    0.0     0.0
1.0  941.0  1152.0




  miss = matrix[0][1]/(matrix[0][0]+matrix[0][1])


In [43]:
output = model.predict(X_test,percentage=False)
model.confusion_matrix(output,y_test)



accurary : 0.5652173913043478
error : 0.4347826086956522
confusion matrix : 

       0.0    1.0
0.0    0.0    0.0
1.0  130.0  169.0




  miss = matrix[0][1]/(matrix[0][0]+matrix[0][1])
