In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Advertising.csv').drop('Unnamed: 0', axis = 1)

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
X = df.drop('sales', axis = 1)
y = df['sales']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [7]:
scaler = StandardScaler()
scaler.fit(X_train)

In [8]:
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

### Hyperparameter tuining with `cross_val_score`
- it allows us to tune hyperparameter before we actually fit train data into the model
- `cross_val_score` will score the model based on the scoring parameter
- After getting the best performing model from those score we can select the model to fit the train data

In [19]:
cv_score = {}
for i in range(20):
    model = Ridge(alpha = i)
    score = cross_val_score(model, scaled_X_train, y_train, scoring= 'neg_mean_squared_error' ,cv = 5)
    # cross_val_score based on neg_mean_squared_error returns k (i.e cv here) number of -MSE
    cv_score[i] = abs(score.mean())
    # for those k number of errors we calculated their average and the absolute value is stored in a dictionary

### What `scoring` paramater could hold : 
- Visit
https://scikit-learn.org/stable/modules/model_evaluation.html

In [10]:
# syntax to get key of a minimum value in a dict 
# min(dict, key = dict.get)
min(cv_score, key = cv_score.get)

0

In [11]:
cv_score[0]

np.float64(3.3439092212046893)

In [12]:
# the RMSE of a best performing model is
np.sqrt(cv_score[0])

np.float64(1.8286358908226343)

#### From the above experiment we found that the model (ridge model for linear regression) performs best at lambda = 0

> if we notice the model is not actually fitted with any train set, we just calculated MSE using `cross_val_score` to examine best performing model

#### Now lets fit the model with $\lambda = 0$ i.e Ridge(alpha = 0)
- Using `Ridge(alpha = 0)` is same as using LinearRegression
- The documentation suggest not to use `alpha = 0` with `Ridge()` rather use `LinearRegression`
- To see documentation use `shift+tab` with `Ridge()`

In [13]:
from sklearn.linear_model import LinearRegression

In [14]:
model = LinearRegression()

In [15]:
model.fit(scaled_X_train,y_train)

In [16]:
y_test_pred = model.predict(scaled_X_test)

In [17]:
MSE = mean_squared_error(y_test,y_test_pred)
MSE

np.float64(2.2987166978863787)

In [18]:
RMSE = np.sqrt(MSE)
RMSE

np.float64(1.5161519375993882)

## `cross_validate()` function
- allows us to analyze the score_time, fit_time and see the multiple scoring

In [20]:
from sklearn.model_selection import cross_validate

In [21]:
X = df.drop('sales', axis = 1)
y = df['sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [22]:
model1 = Ridge(alpha = 20)

In [25]:
scores = cross_validate(estimator= model1, X = scaled_X_train, y = y_train, scoring = ['neg_mean_squared_error',
                                                                              'neg_mean_absolute_error'], cv = 10 )

In [26]:
scores = pd.DataFrame(scores)
scores

Unnamed: 0,fit_time,score_time,test_neg_mean_squared_error,test_neg_mean_absolute_error
0,0.003916,0.003095,-3.058761,-1.469033
1,0.003092,0.002266,-4.211171,-1.617374
2,0.002433,0.002034,-1.813678,-1.119691
3,0.002198,0.001948,-1.100837,-0.899526
4,0.002375,0.001936,-4.361332,-1.708956
5,0.002026,0.001862,-8.902314,-1.639851
6,0.003737,0.002447,-1.6185,-0.927667
7,0.002258,0.002095,-3.412693,-1.275029
8,0.002154,0.00196,-5.682489,-1.723991
9,0.003881,0.003581,-3.1527,-1.28374


- here, `fit_time` is time taken to fit each folds
- `score_time` is the time taken to score the model (estimator) on each folds
- `test_neg_mean_squared_error` and `test_neg_mean_absolute_error` are the -MSE and -MAE of each fold

In [27]:
# to calculate the average of the socre
scores.mean()

fit_time                        0.002807
score_time                      0.002322
test_neg_mean_squared_error    -3.731447
test_neg_mean_absolute_error   -1.366486
dtype: float64

> This way we can analyze the model's accuracy along with computation time

> A model could best perform but also could be computationally expensive and takes alot of time

> A good model is not only that has minimum error but also conumes less time and is computationally cheap