# Linear Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
import pandas as pd
import seaborn as sns
import math
import os
import operator
import torch

In [None]:
df = pd.read_csv('car_data.csv')
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [None]:
f_continuous = df[['Year', 'Selling_Price', 'Present_Price', 'Kms_Driven', 'Owner']]
f_categorical = pd.get_dummies(df[['Fuel_Type', 'Seller_Type', 'Transmission']])
df = pd.concat([f_continuous, f_categorical], axis=1)

# Drop refundant features
df.drop(['Transmission_Automatic', 'Seller_Type_Dealer', 'Fuel_Type_CNG'], axis=1, inplace=True)
df.head()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Owner,Fuel_Type_Diesel,Fuel_Type_Petrol,Seller_Type_Individual,Transmission_Manual
0,2014,3.35,5.59,27000,0,0,1,0,1
1,2013,4.75,9.54,43000,0,1,0,0,1
2,2017,7.25,9.85,6900,0,0,1,0,1
3,2011,2.85,4.15,5200,0,0,1,0,1
4,2014,4.6,6.87,42450,0,1,0,0,1


In [None]:
Y = df['Selling_Price']
X = df.drop('Selling_Price', axis=1)

In [None]:
from sklearn.model_selection import train_test_split
X_t = torch.from_numpy(X.to_numpy()).float()
Y_t = torch.from_numpy(Y.to_numpy()).float().unsqueeze(1)
X_train, X_test, Y_train, Y_test = train_test_split(X_t, Y_t, test_size=0.2)

In [None]:
def add_ones_col(X):
 
    x_0 = torch.ones((X.shape[0],), dtype=torch.float32).unsqueeze(1)
    X = torch.cat([x_0, X], dim=1)
    return X

def multi_linear_reg(X, y):
    
    X = add_ones_col(X)  
    Xt_X = X.T.mm(X)
    Xt_y = X.T.mm(y)

    Xt_X_inv = Xt_X.inverse()
    w = Xt_X_inv.mm(Xt_y)
    return w

def prediction(X, w):
    
    X = add_ones_col(X)
    return X.mm(w)

In [None]:
w = multi_linear_reg(X_train, Y_train)
Y_pred_train = prediction(X_train, w)
Y_pred_test = prediction(X_test, w)

In [None]:
def mse(Y_true, Y_pred):
    error = Y_pred - Y_true
    return (error.T.mm(error) / Y_pred.shape[0]).item()

def mae(Y_true, Y_pred):
    error = Y_pred - Y_true
    return error.abs().mean().item()

In [None]:
mse_train = mse(Y_train, Y_pred_train)
mae_train = mae(Y_train, Y_pred_train)
print('MSE Train:\t', mse_train)
print('MAE Train:\t', mae_train, end='\n\n')

mse_test = mse(Y_test, Y_pred_test)
mae_test = mae(Y_test, Y_pred_test)
print('MSE Test:\t', mse_test)
print('MAE Test:\t', mae_test, end='\n\n')

MSE Train:	 3.715343952178955
MAE Train:	 1.4379292726516724

MSE Test:	 3.8426644802093506
MAE Test:	 1.5005557537078857



# Logistic Regression

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('default')

data = pd.read_csv("car_data.csv")

In [None]:
X = data.drop(["Owner","Car_Name","Year","Fuel_Type","Seller_Type","Transmission"], axis =1)
Y = data["Owner"]

In [None]:
from sklearn.metrics import accuracy_score


class LogisticRegression():
    def __init__(self, X, y , X_valid, y_valid, lr,epochs, reg = False,useoptimizer = False,ismultiClass = False):
        self.X,self.y,  self.X_valid,self.y_valid,self.lr ,self.epochs,self.reg,self.useoptimizer, self.ismultiClass= X, y , X_valid, y_valid, lr, epochs, reg, useoptimizer,ismultiClass
        if(not ismultiClass):            
            self.optim = GradientDesent( X, y , X_valid, y_valid, lr,epochs, reg,useoptimizer, self.sigmoid)
   
    def sigmoid(self, z):    
        return 1/(1+np.exp(-z))  
    
    def fit(self):
        if(self.ismultiClass):
            self.fitMulti()
        else:
            w = self.optim.optimize()
            h = sigmoid(np.dot(self.X, w))
            h_valid = sigmoid(np.dot(self.X_valid, w))
            train_preds = [1 if i > 0.5 else 0 for i in h]
            val_preds = [1 if i > 0.5 else 0 for i in h_valid]
            train_accuracy =  accuracy_score(self.y , train_preds)
            val_accuracy =  accuracy_score(self.y_valid , val_preds)

            print(f'Train Accuracy: {train_accuracy} Val Accuracy: {val_accuracy}')   
        
        
    def fitMulti(self):
        labelCount = len(np.unique(self.y))
        print(f'Total label Count: {labelCount}') 
        w = np.zeros((labelCount, self.X.shape[1]))
                     
        for i in range(1, labelCount+1):
            print(f'Train LG for class : {i}')
            y_change = np.array([1 if i == label else 0 for label in self.y])
            y_valid_change = np.array([1 if i == label else 0 for label in self.y_valid])            
            optim = GradientDesent( self.X, y_change , self.X_valid, y_valid_change, self.lr, self.epochs, self.reg, self.useoptimizer, self.sigmoid)
            w[i-1:] = optim.optimize()                     
        
        train_preds = np.argmax(self.sigmoid(np.dot(self.X, w.T)) ,axis = 1) + 1
        val_preds = np.argmax(self.sigmoid(np.dot(self.X_valid, w.T)) ,axis = 1) + 1         
        
        train_accuracy =  accuracy_score(self.y , train_preds)
        val_accuracy =  accuracy_score(self.y_valid , val_preds)        
        print(f'Train Accuracy: {train_accuracy} Val Accuracy: {val_accuracy}')

In [None]:
import scipy.optimize as opt
class GradientDesent():
    
    def __init__(self, X, y , X_valid, y_valid, lr, epochs, reg = False, useoptimizer = False, activate_fn = None):
        self.X,self.y,  self.X_valid,self.y_valid,self.lr ,self.epochs, self.reg, self.useoptimizer, self.activate_fn  = X, y , X_valid, y_valid, lr,epochs, reg,useoptimizer,activate_fn
            
    def optimize(self):
        w = np.zeros(X.shape[1])
              
        
        if(self.useoptimizer):
            result = opt.fmin_tnc(func=self.cost, x0=w, fprime=self.gradient,args=(self.X, self.y)) 
            train_loss= self.cost(result[0], self.X, self.y)
            val_loss = self.cost(result[0], self.X_valid, self.y_valid)  
            print(f'Runned Spicy Optimizer > Train Loss: {train_loss} Val Loss: {val_loss}')          
            return result[0]             
            
        train_cost = np.zeros(self.epochs)
        val_cost = np.zeros(self.epochs)      

        
        for i in range(self.epochs):
            grad = self.gradient(w, self.X, self.y)
            w = np.subtract(w , np.multiply(grad , self.lr))
            train_loss= self.cost(w, self.X, self.y)
            val_loss = self.cost(w, self.X_valid, self.y_valid)         
            train_cost[i] = train_loss
            val_cost[i] = val_loss
            print(f'Epochs: {i} Train Loss: {train_loss} Val Loss: {val_loss}')   
            
        
        return w
        
    def cost(self,w, X, y):
        h = np.dot(X, w)
        m = len(X)       
        if(self.activate_fn != None):  h = self.activate_fn(h)
        
        loss= (np.dot(-y, np.log(h))) - (np.dot(1-y, np.log(1-h)))
        loss = np.sum(loss)/m
        
        if(self.reg):           
            loss = loss + ((self.lr/2*m) * np.sum(np.power(w,2)))
        return loss
            
    
    def gradient(self,w,X, y):
        h = np.dot(X, w)      
        if(self.activate_fn != None):  h = self.activate_fn(h)     
        m = len(X)
        loss= h - y
        gradient = np.dot(X.T,loss)/m
        if(self.reg): gradient = gradient + np.dot((self.lr/m), w)      
        return gradient

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(
            X,Y, test_size=0.15, random_state=42)
print(X_train.shape)
print(X_valid.shape)

(255, 3)
(46, 3)


In [None]:
model = LogisticRegression(X_train, y_train,X_valid,y_valid,  lr = 0.001, epochs =5000,useoptimizer=True,ismultiClass=True)
model.fit()

Total label Count: 3
Train LG for class : 1
Runned Spicy Optimizer > Train Loss: 0.1934467571974137 Val Loss: 0.14715781589520394
Train LG for class : 2
Runned Spicy Optimizer > Train Loss: 0.006062695289876363 Val Loss: 0.007083977658482754
Train LG for class : 3
Runned Spicy Optimizer > Train Loss: 0.000705422564034831 Val Loss: 0.00042272363738137913
Train Accuracy: 0.0392156862745098 Val Accuracy: 0.021739130434782608


  # This is added back by InteractiveShellApp.init_path()


In [None]:
model = LogisticRegression(X_train, y_train,X_valid,y_valid,  lr = 0.0000001, epochs =5000,reg = True, useoptimizer=True,ismultiClass=True)
model.fit()

Total label Count: 3
Train LG for class : 1
Runned Spicy Optimizer > Train Loss: 0.19344786386861634 Val Loss: 0.14715801323396946
Train LG for class : 2
Runned Spicy Optimizer > Train Loss: 0.006062695321282896 Val Loss: 0.00708397766210033
Train LG for class : 3


  # This is added back by InteractiveShellApp.init_path()


Runned Spicy Optimizer > Train Loss: 0.003233152965074141 Val Loss: 0.001300002132027879
Train Accuracy: 0.0392156862745098 Val Accuracy: 0.021739130434782608


# Linear Regression with fish data

In [None]:
fish_df = pd.read_csv("Fish.csv")

In [None]:
fish_df.dtypes

Species     object
Weight     float64
Length1    float64
Length2    float64
Length3    float64
Height     float64
Width      float64
dtype: object

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
fish_df['Species'] =label_encoder.fit_transform(fish_df["Species"])

In [None]:
from sklearn.model_selection import train_test_split

X = fish_df.drop(['Species'], axis = 1)
Y = fish_df['Species']
X_t = torch.from_numpy(X.to_numpy()).float()
Y_t = torch.from_numpy(Y.to_numpy()).float().unsqueeze(1)
X_train, X_test, Y_train, Y_test = train_test_split(X_t, Y_t, test_size=0.2)

In [None]:
w = multi_linear_reg(X_train, Y_train)
Y_pred_train = prediction(X_train, w)
Y_pred_test = prediction(X_test, w)

In [None]:
mse_train = mse(Y_train, Y_pred_train)
mae_train = mae(Y_train, Y_pred_train)
print('MSE Train:\t', mse_train)
print('MAE Train:\t', mae_train)

mse_test = mse(Y_test, Y_pred_test)
mae_test = mae(Y_test, Y_pred_test)
print('MSE Test:\t', mse_test)
print('MAE Test:\t', mae_test)

MSE Train:	 1.0862778425216675
MAE Train:	 0.7295225858688354
MSE Test:	 0.616209089756012
MAE Test:	 0.6806548833847046


# logistic regression with fish data

In [None]:
fish_df = pd.read_csv("Fish.csv")

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
fish_df['Species'] =label_encoder.fit_transform(fish_df["Species"])

In [None]:
from sklearn.model_selection import train_test_split

X = fish_df.drop(['Species'], axis = 1)
Y = fish_df['Species']

#X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)

In [None]:
from sklearn.metrics import accuracy_score


class LogisticRegression():
    def __init__(self, X, y , X_valid, y_valid, lr,epochs, reg = False,useoptimizer = False,ismultiClass = False):
        self.X,self.y,  self.X_valid,self.y_valid,self.lr ,self.epochs,self.reg,self.useoptimizer, self.ismultiClass= X, y , X_valid, y_valid, lr, epochs, reg, useoptimizer,ismultiClass
        if(not ismultiClass):            
            self.optim = GradientDesent( X, y , X_valid, y_valid, lr,epochs, reg,useoptimizer, self.sigmoid)
   
    def sigmoid(self, z):    
        return 1/(1+np.exp(-z))  
    
    def fit(self):
        if(self.ismultiClass):
            self.fitMulti()
        else:
            w = self.optim.optimize()
            h = sigmoid(np.dot(self.X, w))
            h_valid = sigmoid(np.dot(self.X_valid, w))
            train_preds = [1 if i > 0.5 else 0 for i in h]
            val_preds = [1 if i > 0.5 else 0 for i in h_valid]
            train_accuracy =  accuracy_score(self.y , train_preds)
            val_accuracy =  accuracy_score(self.y_valid , val_preds)

            print(f'Train Accuracy: {train_accuracy} Val Accuracy: {val_accuracy}')   
        
        
    def fitMulti(self):
        labelCount = len(np.unique(self.y))
        print(f'Total label Count: {labelCount}') 
        w = np.zeros((labelCount, self.X.shape[1]))
                     
        for i in range(1, labelCount+1):
            print(f'Train LG for class : {i}')
            y_change = np.array([1 if i == label else 0 for label in self.y])
            y_valid_change = np.array([1 if i == label else 0 for label in self.y_valid])            
            optim = GradientDesent( self.X, y_change , self.X_valid, y_valid_change, self.lr, self.epochs, self.reg, self.useoptimizer, self.sigmoid)
            w[i-1:] = optim.optimize()                     
        
        train_preds = np.argmax(self.sigmoid(np.dot(self.X, w.T)) ,axis = 1) + 1
        val_preds = np.argmax(self.sigmoid(np.dot(self.X_valid, w.T)) ,axis = 1) + 1         
        
        train_accuracy =  accuracy_score(self.y , train_preds)
        val_accuracy =  accuracy_score(self.y_valid , val_preds)        
        print(f'Train Accuracy: {train_accuracy} Val Accuracy: {val_accuracy}')

In [None]:
import scipy.optimize as opt
class GradientDesent():
    
    def __init__(self, X, y , X_valid, y_valid, lr, epochs, reg = False, useoptimizer = False, activate_fn = None):
        self.X,self.y,  self.X_valid,self.y_valid,self.lr ,self.epochs, self.reg, self.useoptimizer, self.activate_fn  = X, y , X_valid, y_valid, lr,epochs, reg,useoptimizer,activate_fn
            
    def optimize(self):
        w = np.zeros(X.shape[1])
              
        
        if(self.useoptimizer):
            result = opt.fmin_tnc(func=self.cost, x0=w, fprime=self.gradient,args=(self.X, self.y)) 
            train_loss= self.cost(result[0], self.X, self.y)
            val_loss = self.cost(result[0], self.X_valid, self.y_valid)  
            print(f'Runned Spicy Optimizer > Train Loss: {train_loss} Val Loss: {val_loss}')          
            return result[0]             
            
        train_cost = np.zeros(self.epochs)
        val_cost = np.zeros(self.epochs)      

        
        for i in range(self.epochs):
            grad = self.gradient(w, self.X, self.y)
            w = np.subtract(w , np.multiply(grad , self.lr))
            train_loss= self.cost(w, self.X, self.y)
            val_loss = self.cost(w, self.X_valid, self.y_valid)         
            train_cost[i] = train_loss
            val_cost[i] = val_loss
            print(f'Epochs: {i} Train Loss: {train_loss} Val Loss: {val_loss}')   
            
        
        return w
        
    def cost(self,w, X, y):
        h = np.dot(X, w)
        m = len(X)       
        if(self.activate_fn != None):  h = self.activate_fn(h)
        
        loss= (np.dot(-y, np.log(h))) - (np.dot(1-y, np.log(1-h)))
        loss = np.sum(loss)/m
        
        if(self.reg):           
            loss = loss + ((self.lr/2*m) * np.sum(np.power(w,2)))
        return loss
            
    
    def gradient(self,w,X, y):
        h = np.dot(X, w)      
        if(self.activate_fn != None):  h = self.activate_fn(h)     
        m = len(X)
        loss= h - y
        gradient = np.dot(X.T,loss)/m
        if(self.reg): gradient = gradient + np.dot((self.lr/m), w)      
        return gradient

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(
            X,Y, test_size=0.15, random_state=42)
print(X_train.shape)
print(X_valid.shape)

(135, 6)
(24, 6)


In [None]:
model = LogisticRegression(X_train, y_train,X_valid,y_valid,  lr = 0.001, epochs =5000,useoptimizer=True,ismultiClass=True)
model.fit()

Total label Count: 7
Train LG for class : 1
Runned Spicy Optimizer > Train Loss: 7.169841607940632e-06 Val Loss: 1.5282581914064993e-09
Train LG for class : 2
Runned Spicy Optimizer > Train Loss: 0.022897019871497146 Val Loss: 0.012600124578937416
Train LG for class : 3


  # This is added back by InteractiveShellApp.init_path()


Runned Spicy Optimizer > Train Loss: 0.013587489300938276 Val Loss: 0.0477010447942689
Train LG for class : 4
Runned Spicy Optimizer > Train Loss: 0.14716243219644512 Val Loss: 0.10348891171042773
Train LG for class : 5
Runned Spicy Optimizer > Train Loss: 4.200888356627509e-07 Val Loss: nan
Train LG for class : 6
Runned Spicy Optimizer > Train Loss: 0.10844630600378724 Val Loss: 0.3007033061314976
Train LG for class : 7
Runned Spicy Optimizer > Train Loss: 3.2553345525521665e-09 Val Loss: 5.523623715194476e-10
Train Accuracy: 0.8 Val Accuracy: 0.5833333333333334


In [None]:
model = LogisticRegression(X_train, y_train,X_valid,y_valid,  lr = 0.0000001, epochs =5000,reg = True, useoptimizer=True,ismultiClass=True)
model.fit()

Total label Count: 7
Train LG for class : 1
Runned Spicy Optimizer > Train Loss: 0.0038347879193639353 Val Loss: 0.0006608035458345611
Train LG for class : 2
Runned Spicy Optimizer > Train Loss: 0.027959597176426543 Val Loss: 0.005029274220647714
Train LG for class : 3




Runned Spicy Optimizer > Train Loss: 0.01373501559380547 Val Loss: 0.04766002244761856
Train LG for class : 4
Runned Spicy Optimizer > Train Loss: 0.15306093677994295 Val Loss: 0.10282490261750174
Train LG for class : 5
Runned Spicy Optimizer > Train Loss: 0.0015726148981420338 Val Loss: 0.10190388096991657
Train LG for class : 6
Runned Spicy Optimizer > Train Loss: 0.10883846093549265 Val Loss: 0.30783805218117005
Train LG for class : 7
Runned Spicy Optimizer > Train Loss: 4.4466303356135775e-06 Val Loss: 1.0256938251545716e-06
Train Accuracy: 0.8074074074074075 Val Accuracy: 0.5833333333333334
