# Multiple Linear Regression

### Read Dataset

In [1]:
import pandas as pd
db = pd.read_csv("Advertising.csv", usecols = [1,2,3,4]) # To fix index problem, ignore first index
df = db.copy()
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [2]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TV,200.0,147.0425,85.854236,0.7,74.375,149.75,218.825,296.4
radio,200.0,23.264,14.846809,0.0,9.975,22.9,36.525,49.6
newspaper,200.0,30.554,21.778621,0.3,12.75,25.75,45.1,114.0
sales,200.0,14.0225,5.217457,1.6,10.375,12.9,17.4,27.0


In [3]:
df.isnull().sum()

TV           0
radio        0
newspaper    0
sales        0
dtype: int64

In [4]:
df.corr()

Unnamed: 0,TV,radio,newspaper,sales
TV,1.0,0.054809,0.056648,0.782224
radio,0.054809,1.0,0.354104,0.576223
newspaper,0.056648,0.354104,1.0,0.228299
sales,0.782224,0.576223,0.228299,1.0


### Choose Target Feature

In [5]:
X = df.drop("sales", axis = 1)
y = df["sales"]

In [6]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict

### Split Dataset

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state= 42)

In [8]:
print(X_train.shape, X_test.shape, y_train.shape, X_test.shape)

(160, 3) (40, 3) (160,) (40, 3)


### Fit the Dataset

In [9]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [10]:
model.intercept_

2.979067338122629

In [11]:
model.coef_

array([0.04472952, 0.18919505, 0.00276111])

### Predict the Dataset

In [12]:
new_data = [[30], [10],[40]]
new_data = pd.DataFrame(new_data).T

In [13]:
model.predict(new_data)

array([6.32334798])

In [14]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

In [15]:
rmse = np.sqrt(mean_squared_error(y_train, model.predict(X_train)))
rmse

1.644727765644337

In [16]:
rmse = np.sqrt(mean_squared_error(y_test, model.predict(X_test)))
rmse

1.7815996615334502

### Model Tuning

In [17]:
model.score(X_train, y_train)

0.8957008271017818

In [18]:
cross_val_score(model, X_train, y_train, cv = 10, scoring = "r2").mean()

0.791354859691634

In [19]:
np.sqrt(-cross_val_score(model, 
                X_train, 
                y_train, 
                cv = 10, 
                scoring = "neg_mean_squared_error")).mean()

1.6513523730313335

In [20]:
np.sqrt(-cross_val_score(model, 
                X_test, 
                y_test, 
                cv = 10, 
                scoring = "neg_mean_squared_error")).mean()

1.846277882399709