In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

In [2]:
X = np.random.randint(1, 500, size = 300)
y = (X + 4) * 0.6

In [3]:
X.shape, y.shape

((300,), (300,))

In [4]:
df = pd.DataFrame({
    'X':X,
    'y':y
})

In [5]:
df

Unnamed: 0,X,y
0,337,204.6
1,6,6.0
2,203,124.2
3,139,85.8
4,39,25.8
...,...,...
295,186,114.0
296,180,110.4
297,153,94.2
298,193,118.2


In [6]:
X = X.reshape(-1,1)
y = y.reshape(-1,1)
LR_model = LinearRegression()
LR_model.fit(X,y)

In [7]:
pred = LR_model.predict(X)

In [8]:
df['y_hat'] = pred

In [9]:
df

Unnamed: 0,X,y,y_hat
0,337,204.6,204.6
1,6,6.0,6.0
2,203,124.2,124.2
3,139,85.8,85.8
4,39,25.8,25.8
...,...,...,...
295,186,114.0,114.0
296,180,110.4,110.4
297,153,94.2,94.2
298,193,118.2,118.2


In [10]:
MSE = mean_squared_error(df['y'],df['y_hat'])
MSE

3.1101367095608537e-28

In [11]:
MAE = mean_absolute_error(df['y'],df['y_hat'])
MAE

8.855138844410249e-15

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.99, random_state=42)

In [13]:
X_train = X_train.reshape(-1,1)
y_train = y_train.reshape(-1,1)
model = LinearRegression()
model.fit(X_train,y_train)

In [14]:
X_test = X_test.reshape(-1,1)
perd = model.predict(X_test)

In [15]:
df1 = pd.DataFrame(perd, columns = ['y_hat'])
df1

Unnamed: 0,y_hat
0,282.6
1,93.6
2,18.0
3,163.2
4,246.0
...,...
292,155.4
293,234.6
294,70.2
295,199.8


In [16]:
perd.shape

(297, 1)

In [36]:
mae = mean_absolute_error(df['y'],df1['y_hat'])
mae

ValueError: Found input variables with inconsistent numbers of samples: [300, 297]

In [12]:
mse = mean_squared_error(df['y'],df['y_hat_new'])
mse

2.456710074084536e-29

In [17]:
y = np.random.randint(0, 20, size = 20)
y_hat = np.random.randint(0, 20, size = 20)

In [18]:
df_new = pd.DataFrame({
    'y': y,
    'y_hat':y_hat
})

In [19]:
df_new

Unnamed: 0,y,y_hat
0,10,5
1,3,3
2,7,18
3,5,8
4,19,3
5,13,16
6,11,13
7,8,15
8,11,4
9,11,2


In [20]:
df_new['y - y_hat'] = df_new['y'] - df_new['y_hat']


In [21]:
df_new

Unnamed: 0,y,y_hat,y - y_hat
0,10,5,5
1,3,3,0
2,7,18,-11
3,5,8,-3
4,19,3,16
5,13,16,-3
6,11,13,-2
7,8,15,-7
8,11,4,7
9,11,2,9


In [25]:
MSE_NEW = mean_squared_error(df_new['y'],df_new['y_hat'])
MSE_NEW

62.0

In [27]:
MAE_NEW = mean_absolute_error(df_new['y'],df_new['y_hat'])
MAE_NEW

6.2

In [28]:
df_new['sqe'] = np.square(df_new['y - y_hat'])
df_new['abs'] = np.abs(df_new['y - y_hat'])

In [29]:
df_new

Unnamed: 0,y,y_hat,y - y_hat,sqe,abs
0,10,5,5,25,5
1,3,3,0,0,0
2,7,18,-11,121,11
3,5,8,-3,9,3
4,19,3,16,256,16
5,13,16,-3,9,3
6,11,13,-2,4,2
7,8,15,-7,49,7
8,11,4,7,49,7
9,11,2,9,81,9


In [30]:
sum(df_new["sqe"]) / df_new.__len__()

62.0

In [31]:
sum(df_new['abs']) / df_new.__len__()

6.2