In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from numpy.linalg import inv

In [57]:
data = pd.read_csv('abalone.csv')
data = data.set_axis(['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings'], axis=1, inplace=False)
# data.columns =['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']
data

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
1,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
2,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
3,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
4,I,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.1200,8
...,...,...,...,...,...,...,...,...,...
4171,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4172,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4173,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4174,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


# Correlation

In [58]:
corr = data.corr()
corr

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
Length,1.0,0.986813,0.827552,0.925255,0.897905,0.90301,0.897697,0.557123
Diameter,0.986813,1.0,0.833705,0.925452,0.893159,0.899726,0.905328,0.575005
Height,0.827552,0.833705,1.0,0.819209,0.774957,0.798293,0.817326,0.558109
Whole weight,0.925255,0.925452,0.819209,1.0,0.969403,0.966372,0.955351,0.540818
Shucked weight,0.897905,0.893159,0.774957,0.969403,1.0,0.931956,0.882606,0.421256
Viscera weight,0.90301,0.899726,0.798293,0.966372,0.931956,1.0,0.907647,0.504274
Shell weight,0.897697,0.905328,0.817326,0.955351,0.882606,0.907647,1.0,0.628031
Rings,0.557123,0.575005,0.558109,0.540818,0.421256,0.504274,0.628031,1.0


In [59]:
corr['Rings']

Length            0.557123
Diameter          0.575005
Height            0.558109
Whole weight      0.540818
Shucked weight    0.421256
Viscera weight    0.504274
Shell weight      0.628031
Rings             1.000000
Name: Rings, dtype: float64

In [60]:
X = data[['Length', 'Diameter', 'Height', 'Whole weight', 'Shell weight']]
Y = data[['Rings']]

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(X.to_numpy(), Y.to_numpy(), test_size=0.2, random_state=30)

In [62]:
class LinearLeastSquare:
    def __init__(self):
        pass
    
    # train
    def fit(self, X, Y):
        self.w = np.matmul(inv(np.matmul(X.T, X)), np.matmul(X.T, Y))
        
    def predict(self, x):
        height_pred = x * self.w
        return height_pred
    
    def evaluate(self, X, Y, loss='MAE'):   
        Y_pred = np.matmul(X, self.w)
        
        Error = Y - Y_pred
        
        if loss == 'MAE':
            return np.mean(np.abs(Error))
        
        elif loss == 'MSE':
            return np.mean(Error ** 2)
        
        elif loss == 'Huber':
            return np.where((np.abs(Error) < 1), (np.square(Error) / 2), (np.abs(Error) - 0.5))
        
        elif loss == 'Hinge':
            return np.mean([max(0, 1 - x * y) for x, y in zip((np.array([-1 if i==0 else i for i in Y])), (np.array([-1 if i==0 else i for i in Y_pred])))])

- MAE : Mean Absoulate Error
- MSE : Mean Square Error
- Huber
- Hinge

In [63]:
lls = LinearLeastSquare()
lls.fit(X_train, Y_train)


In [64]:
lls.w

array([[ 5.68998069],
       [12.80979325],
       [15.88715548],
       [-7.91269848],
       [25.28749306]])

In [65]:
Y_pred = X_train * lls.w[0, 0]

In [66]:
print('MAE = ', lls.evaluate(X_test, Y_test, loss='MAE'))
print('MSE = ', lls.evaluate(X_test, Y_test, loss='MSE'))
print('Huber = ', lls.evaluate(X_test, Y_test, loss='Huber'))
print('Hinge = ', lls.evaluate(X_test, Y_test, loss='Hinge'))

MAE =  1.665934796027811
MSE =  5.346120765760551
Huber =  [[1.13140270e+00]
 [2.55822321e+00]
 [2.79519301e-02]
 [4.83856539e+00]
 [4.39238589e-01]
 [2.61068072e-02]
 [1.42202088e+00]
 [1.80902778e-01]
 [3.64175936e+00]
 [7.21505959e-02]
 [1.09637284e-02]
 [1.39556100e+00]
 [8.17642772e-01]
 [2.21885412e-01]
 [1.35654790e-01]
 [1.41946376e+00]
 [3.07882265e+00]
 [4.28781615e-01]
 [1.64725124e-05]
 [7.50675899e-01]
 [2.83918310e-03]
 [2.21312094e+00]
 [4.51616343e+00]
 [5.01693440e-03]
 [1.63096444e+00]
 [2.98150230e+00]
 [2.08825433e+00]
 [4.87994405e+00]
 [1.53610666e+00]
 [1.42350825e-01]
 [1.74643545e+00]
 [7.57827437e-01]
 [1.87136595e+00]
 [6.00433662e-01]
 [1.32745196e+00]
 [2.63714041e-01]
 [1.85771700e+00]
 [1.00648417e+00]
 [8.34951375e-05]
 [5.91928127e-01]
 [1.46604718e+00]
 [5.17491503e+00]
 [6.40914212e+00]
 [1.45442669e+00]
 [3.23499421e-01]
 [1.44349059e-01]
 [4.08872655e-01]
 [1.03278313e+00]
 [8.96988783e-01]
 [1.90978558e-01]
 [1.89667092e+00]
 [1.87850861e-01]
 [1.7