In [11]:
import numpy as np
from numpy.linalg import inv
import pandas as pd
from sklearn.model_selection import train_test_split

In [12]:
data = pd.read_csv('abalone.csv')
data.head()

Unnamed: 0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [13]:
data.columns = ["Sex","Length","Diameter","Height","Whole weight",
                   "Shucked weight","Viscera weight","Shell weight","Rings"]
data.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [19]:
X = data[["Length","Diameter","Height","Whole weight",
                   "Shucked weight","Viscera weight","Shell weight"]].values
Y = data[["Rings"]].values

In [20]:
corr = data.corr()
corr

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
Length,1.0,0.986813,0.827552,0.925255,0.897905,0.90301,0.897697,0.557123
Diameter,0.986813,1.0,0.833705,0.925452,0.893159,0.899726,0.905328,0.575005
Height,0.827552,0.833705,1.0,0.819209,0.774957,0.798293,0.817326,0.558109
Whole weight,0.925255,0.925452,0.819209,1.0,0.969403,0.966372,0.955351,0.540818
Shucked weight,0.897905,0.893159,0.774957,0.969403,1.0,0.931956,0.882606,0.421256
Viscera weight,0.90301,0.899726,0.798293,0.966372,0.931956,1.0,0.907647,0.504274
Shell weight,0.897697,0.905328,0.817326,0.955351,0.882606,0.907647,1.0,0.628031
Rings,0.557123,0.575005,0.558109,0.540818,0.421256,0.504274,0.628031,1.0


In [25]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [40]:
class LinearLeastSquare:
    def __init__(self):
        pass
    
    #train
    def fit(self, X, Y):
        # w= شیب خط
        # w = (X.T X)^-1 * X.T Y
        self.w = np.matmul(inv(np.matmul(X.T, X)), np.matmul(X.T, Y))
        
    def predict(self, x):
        ring_pred = np.matmul(x,self.w)
        return ring_pred
    
    def evaluate(self, X, Y, loss='MAE'):
        Y_pred = []
        for i in range(X.shape[0]):
            y_pred = self.predict(X[i])
            Y_pred.append(y_pred)
            
        Y_pred = np.array(Y_pred)
        
        Error = Y - Y_pred
        
        if loss == 'MAE':
            return np.mean(np.abs(Error))
        elif loss == 'MSE':
            return np.mean(Error ** 2)
        elif loss == 'Huber':
            d = 1
            huber_mse = 0.5*(Error**2)
            huber_mae = d * (np.abs(Error) - 0.5 * d)
            return np.where(np.abs(Error) <= d, huber_mse, huber_mae)
        elif loss == 'Hinge':
            new_predicted = np.array([-1 if i==0 else i for i in Y_pred])
    
            # calculating hinge loss
            hinge_loss = np.mean([max(0, 1-x*y) for x, y in zip(Y, new_predicted)])
            return hinge_loss

In [41]:
lls = LinearLeastSquare()
lls.fit(X_train, Y_train)

In [42]:
lls.w

array([[  7.266131  ],
       [ 12.64932552],
       [ 12.33904124],
       [  8.32067073],
       [-21.08555616],
       [-10.33974378],
       [  7.23507357]])

In [43]:
y_pred = lls.predict(X_test)

In [45]:
MAE = lls.evaluate(X_test, Y_test)
print('MAE = ', MAE)

MAE =  1.6653763210087427


In [46]:
MSE = lls.evaluate(X_test, Y_test, loss='MSE')
print('MSE = ', MSE)

MSE =  5.0789038966266595


In [47]:
Huber = lls.evaluate(X_test, Y_test, loss='Huber')
print('Huber = ', Huber)

Huber =  [[1.2346156 ]
 [2.5575993 ]
 [1.70743613]
 ...
 [0.63544768]
 [0.90074112]
 [1.66355283]]


In [48]:
Hinge = lls.evaluate(X_test, Y_test, loss='Hinge')
print('Hinge = ', Hinge)

Hinge =  [0.01148347]


  arr = asanyarray(a)
