In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from numpy.linalg import inv

In [2]:
data = pd.read_csv('abalone.data.csv')
data.columns = ['Sex','Length','Diameter','Height','Whole weight','Shucked weight','Viscera weight','Shell weight','Rings']
data

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
1,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
2,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
3,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
4,I,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.1200,8
...,...,...,...,...,...,...,...,...,...
4171,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4172,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4173,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4174,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [3]:
X = np.array([data['Length'],data['Diameter'],data['Height'],data['Whole weight'],data['Shucked weight'],
             data['Viscera weight'],data['Shell weight']]).T
X.shape

Y = np.array(data['Rings'])
Y = Y.reshape((Y.shape[0],1))
Y.shape

(4176, 1)

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,Y)
X_test.shape

(1044, 7)

In [25]:
class LinearLeastSquare:
    def __init__(self):
        pass
    #train
    def fit(self,X,Y):
        #w = (X.T X)^-1 * X.T Y
        # X = X.values.reshape(-1,1)
        # Y = Y.values.reshape(-1,1)
        self.w = np.matmul(inv(np.matmul(X.T,X)),np.matmul(X.T,Y))         #w = shibkhat
        
    def predict(self,x):
        height_pred = x*self.w
        
        return height_pred
    
    def evaluate(self,X,Y,loss='MAE'):
        Y_pred = np.matmul(X,self.w)
        Error = Y - Y_pred
        
        if loss == 'MAE':
            return np.mean(np.abs(Error))
        
        elif loss == 'MSE':
            return np.mean(Error ** 2)
        
        elif loss == 'Huber':
            is_small_error = np.abs(Error) < 1
            squared_loss = np.square(Error) / 2
            linear_loss  = np.abs(Error) - 0.5
            huber = np.where(is_small_error, squared_loss, linear_loss)
            return huber
        
        elif loss == 'Hinge':
            new_predicted = np.array([-1 if i==0 else i for i in Y_pred])
            new_actual = np.array([-1 if i==0 else i for i in Y])

            # calculating hinge loss
            hinge_loss = np.mean([max(0, 1-x*y) for x, y in zip(new_actual, new_predicted)])
            return hinge_loss
        
    

In [26]:
lls = LinearLeastSquare()
lls.fit(X_train,y_train)
y_pred = X_train * lls.w[0,0]


In [22]:
lls.evaluate(X_test,y_test)

1.6669273435164922

In [23]:
lls.evaluate(X_test,y_test,loss='MSE')

5.874816789539097

In [24]:
lls.evaluate(X_test,y_test,loss='Huber')

array([[0.1767999 ],
       [0.4567762 ],
       [0.86107513],
       ...,
       [0.09837462],
       [0.86845194],
       [1.27265401]])

In [27]:
lls.evaluate(X_test,y_test,loss='Hinge')

  arr = asanyarray(a)


array([0.0192471])