In [43]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('Student_Marks.csv')

In [3]:
df.head()

Unnamed: 0,number_courses,time_study,Marks
0,3,4.508,19.202
1,4,0.096,7.734
2,4,3.133,13.811
3,6,7.909,53.018
4,8,7.811,55.299


In [4]:
df.describe()

Unnamed: 0,number_courses,time_study,Marks
count,100.0,100.0,100.0
mean,5.29,4.07714,24.41769
std,1.799523,2.372914,14.326199
min,3.0,0.096,5.609
25%,4.0,2.0585,12.633
50%,5.0,4.022,20.0595
75%,7.0,6.17925,36.67625
max,8.0,7.957,55.299


In [5]:
df.isnull().sum()

number_courses    0
time_study        0
Marks             0
dtype: int64

In [6]:
df.shape

(100, 3)

### Simple Linear Regression

In [7]:
x = df['time_study']
y = df['Marks']

In [9]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = .1,random_state = 42)

In [10]:
x_train.head()

18    7.711
30    3.864
73    1.629
33    6.594
90    4.182
Name: time_study, dtype: float64

In [11]:
x_test.head()

83    3.197
53    6.049
70    3.736
45    2.061
44    1.954
Name: time_study, dtype: float64

In [12]:
y_train.head()

18    50.986
30    24.172
73     7.014
33    39.965
90    24.394
Name: Marks, dtype: float64

In [13]:
y_test.head()

83    16.106
53    36.653
70    16.606
45     8.924
44     9.742
Name: Marks, dtype: float64

In [24]:
x_train = np.array(x_train)
x_test = np.array(x_test)
x_train = x_train.reshape(-1, 1)
x_test = x_test.reshape(-1, 1)

In [25]:
x_train

array([[7.711],
       [3.864],
       [1.629],
       [6.594],
       [4.182],
       [7.811],
       [6.379],
       [5.985],
       [4.218],
       [1.923],
       [2.913],
       [1.803],
       [0.508],
       [3.591],
       [3.948],
       [4.26 ],
       [0.14 ],
       [0.301],
       [6.173],
       [4.633],
       [0.423],
       [4.779],
       [6.201],
       [0.156],
       [6.471],
       [3.211],
       [5.473],
       [2.142],
       [7.543],
       [5.719],
       [3.913],
       [4.083],
       [3.413],
       [3.561],
       [4.378],
       [3.977],
       [2.262],
       [1.407],
       [3.606],
       [4.274],
       [3.635],
       [7.909],
       [6.08 ],
       [6.533],
       [4.41 ],
       [7.451],
       [6.063],
       [0.13 ],
       [2.966],
       [6.376],
       [7.641],
       [6.335],
       [7.591],
       [1.557],
       [6.703],
       [0.55 ],
       [3.797],
       [1.395],
       [2.438],
       [7.163],
       [0.805],
       [2.754],
       [

In [26]:
x_test

array([[3.197],
       [6.049],
       [3.736],
       [2.061],
       [1.954],
       [7.775],
       [2.051],
       [7.957],
       [7.353],
       [4.508]])

In [27]:
lr = LinearRegression()

In [29]:
model = lr.fit(x_train,y_train)

In [32]:
y_pred = model.predict(x_test)

In [39]:
mse = mean_squared_error(y_pred,y_test)

In [37]:
mae = mean_absolute_error(y_pred,y_test)

In [42]:
print('Before Scaling the values')
print('*'*30)
print('Mean Square Error: ',mse)
print('Mean Absolute Error: ',mae)

Before Scaling the values
******************************
Mean Square Error:  24.423299154282287
Mean Absolute Error:  4.188696332346687


## Scaling

In [47]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [48]:
x_train_scaled[:5]

array([[ 1.56945295],
       [-0.06276077],
       [-1.01103154],
       [ 1.09552971],
       [ 0.07216098]])

In [49]:
x_test_scaled[:5]

array([[-0.34575701],
       [ 0.86429589],
       [-0.11706889],
       [-0.82774162],
       [-0.87313982]])

In [50]:
lr = LinearRegression()
model1 = lr.fit(x_train_scaled,y_train)


In [51]:
y_pred_scaled = model.predict(x_test_scaled)

In [53]:
mse = mean_squared_error(y_pred_scaled,y_test)
mae = mean_absolute_error(y_pred_scaled,y_test)

In [54]:
print('After Scaling the values')
print('*'*30)
print('Mean Square Error: ',mse)
print('Mean Absolute Error: ',mae)

After Scaling the values
******************************
Mean Square Error:  671.5290441674986
Mean Absolute Error:  23.28479668977438


## Multi-Linear Regression

In [55]:
df.head()

Unnamed: 0,number_courses,time_study,Marks
0,3,4.508,19.202
1,4,0.096,7.734
2,4,3.133,13.811
3,6,7.909,53.018
4,8,7.811,55.299


In [57]:
x = df.drop('Marks',axis = 1)
y = df['Marks']

In [58]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = .1,random_state = 42)

In [59]:
x_train.head()

Unnamed: 0,number_courses,time_study
18,6,7.711
30,8,3.864
73,3,1.629
33,6,6.594
90,7,4.182


In [60]:
x_test.head()

Unnamed: 0,number_courses,time_study
83,5,3.197
53,7,6.049
70,4,3.736
45,3,2.061
44,4,1.954


In [61]:
y_train.head()

18    50.986
30    24.172
73     7.014
33    39.965
90    24.394
Name: Marks, dtype: float64

In [62]:
y_test.head()

83    16.106
53    36.653
70    16.606
45     8.924
44     9.742
Name: Marks, dtype: float64

In [63]:
mlr = LinearRegression()

In [65]:
mlr = mlr.fit(x_train,y_train)

In [66]:
y_pred = mlr.predict(x_test)

In [67]:
y_pred

array([19.20526872, 37.95095725, 20.21533375,  9.52679258, 10.79938243,
       45.23303923, 13.14989262, 48.0326829 , 37.4893147 , 22.4565529 ])

In [69]:
mse = mean_squared_error(y_pred,y_test)
mae = mean_absolute_error(y_pred,y_test)

print('Multi-Linear Regression')
print('*'*30)
print('Mean Square Error: ',mse)
print('Mean Absolute Error: ',mae)

Multi-Linear Regression
******************************
Mean Square Error:  13.240735628841724
Mean Absolute Error:  3.0606143419717373
