In [114]:
import numpy as np
from numpy.linalg import inv
import pandas as pd
from sklearn.model_selection import train_test_split

In [115]:
abalone = pd.read_csv("abalone.csv")
abalone

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [116]:
abalone = abalone.drop("Sex", axis=1)
abalone

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [117]:
X = abalone.iloc[:, :-1].values
X.shape

(4177, 7)

In [118]:
Y = abalone["Rings"].values
Y.shape

(4177,)

In [119]:
class LinearLeastSquare:
    def __init__(self):
        pass
    
    # train
    def fit(self, X, Y):
        self.w = np.matmul(inv(np.matmul(X.T, X)), np.matmul(X.T, Y))
        
    def predict(self, x):
        rings_pred = np.matmul(x, self.w)
        return rings_pred
    
    def evaluate(self, X, Y_true, loss="MAE"):
        Y_pred = []
        for i in range(X.shape[0]):
            y_pred = self.predict(X[i])
            Y_pred.append(y_pred)
        
        Y_pred = np.array(Y_pred)
        Error = Y_true - Y_pred
        
        if loss == "MAE":
            return np.mean(np.abs(Error))
        elif loss == "MSE":
            return np.mean(Error ** 2)
        elif loss == "Huber":
            delta = 1.0
            huber_mse = 0.5*(Error)**2
            huber_mae = delta * (np.abs(Error) - 0.5 * delta)
            return np.where(np.abs(Error) <= delta, huber_mse, huber_mae)
        elif loss == "Hinge":
            new_predicted = np.array([-1 if i==0 else i for i in Y_pred])
            hinge_loss = np.mean([max(0, 1-x*y) for x, y in zip(Y_true, new_predicted)])
            return hinge_loss

In [120]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [121]:
lls = LinearLeastSquare()
lls.fit(X_train, Y_train)

In [122]:
lls.w

array([  6.73577614,   9.03063006,  26.12133854,   9.69303507,
       -21.12834701, -13.44053044,   4.33018569])

In [123]:
y_pred = lls.predict(X_test)

In [124]:
mae = lls.evaluate(X_test, Y_test, loss="MAE")
print("MAE ==", mae)

MAE == 1.6491534809862765


In [125]:
mse = lls.evaluate(X_test, Y_test, loss="MSE")
print("MSE ==", mse)

MSE == 5.568167784610822


In [126]:
huber_loss = lls.evaluate(X_test, Y_test, loss="Huber")
print("Huber Loss ==", huber_loss)

Huber Loss == [0.55213545 4.61159895 0.22451195 ... 0.02497179 6.00352782 0.28686881]


In [127]:
hinge_loss = lls.evaluate(X_test, Y_test, loss="Hinge")
print("Hinge Loss ==", hinge_loss)

Hinge Loss == 0.0
