# Step - 1: Business Problem Understanding
- Indentify relationship in advertising channels and in sales?

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
df=pd.read_csv('Advertising.csv')
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230100,37800,69200,22100
1,44500,39300,45100,10400
2,17200,45900,69300,9300
3,151500,41300,58500,18500
4,180800,10800,58400,12900


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   TV         200 non-null    int64
 1   radio      200 non-null    int64
 2   newspaper  200 non-null    int64
 3   sales      200 non-null    int64
dtypes: int64(4)
memory usage: 6.4 KB


# Step - 2: Data Understanding
## 2.1: Data Collection

In [4]:
df.describe()

Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147042.5,23264.0,30554.0,14022.5
std,85854.236315,14846.809176,21778.620839,5217.456566
min,700.0,0.0,300.0,1600.0
25%,74375.0,9975.0,12750.0,10375.0
50%,149750.0,22900.0,25750.0,12900.0
75%,218825.0,36525.0,45100.0,17400.0
max,296400.0,49600.0,114000.0,27000.0


In [5]:
df.corr()

Unnamed: 0,TV,radio,newspaper,sales
TV,1.0,0.054809,0.056648,0.782224
radio,0.054809,1.0,0.354104,0.576223
newspaper,0.056648,0.354104,1.0,0.228299
sales,0.782224,0.576223,0.228299,1.0


## 3.4: Train-Test Split

In [6]:
X=df[['TV','radio','newspaper']]
y=df['sales']

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

In [8]:
X_train

Unnamed: 0,TV,radio,newspaper
169,284300,10600,6400
97,184900,21000,22000
31,112900,17400,38600
12,23800,35100,65900
35,290700,4100,8500
...,...,...,...
106,25000,11000,29700
14,204100,32900,46000
92,217700,33500,59000
179,165600,10000,17600


# Modelling - Lasso Regression
Modelling with default parameters

In [9]:
from sklearn.linear_model import Lasso
lasso_base=Lasso()
lasso_base.fit(X_train,y_train)

Lasso()

In [10]:
print('Intercept: ',lasso_base.intercept_)
print('coefficients: ',lasso_base.coef_)

Intercept:  2708.9492419105372
coefficients:  [0.04405928 0.19928749 0.00688245]


In [11]:
test_predictions=lasso_base.predict(X_test)
train_predictions=lasso_base.predict(X_train)

In [12]:
print(lasso_base.score(X_train,y_train))
print(lasso_base.score(X_test,y_test))
from sklearn.model_selection import cross_val_score
scores=cross_val_score(lasso_base,X,y,cv=5)
print(scores.mean())

0.9055159502227751
0.8609466552563986
0.8871063511014737


## Hyperparameter Tuning

In [13]:
from sklearn.model_selection import GridSearchCV
estimator=Lasso()
param_grid={'alpha':[0.1,0.2,0.5,0.7,1,2,5,10,100,1000]}

In [14]:
model_hp=GridSearchCV(estimator,param_grid,cv=5,scoring='r2')
model_hp.fit(X_train,y_train)
model_hp.best_params_

{'alpha': 1000}

## Rebuilt Lasso Model using best hyperparameters

In [15]:
from sklearn.linear_model import Lasso
lasso_best=Lasso(alpha=1000)
lasso_best.fit(X_train,y_train)

print('Intercept: ',lasso_base.intercept_)
print('coefficients: ',lasso_base.coef_)

test_predictions=lasso_base.predict(X_test)
train_predictions=lasso_base.predict(X_train)
print(lasso_base.score(X_train,y_train))
print(lasso_base.score(X_test,y_test))
from sklearn.model_selection import cross_val_score
scores=cross_val_score(lasso_base,X,y,cv=5)
print(scores.mean())

Intercept:  2708.9492419105372
coefficients:  [0.04405928 0.19928749 0.00688245]
0.9055159502227751
0.8609466552563986
0.8871063511014737


# Ridge Regression
- Modeling with default parameters

In [16]:
from sklearn.linear_model import Ridge
ridge_model=Ridge()
ridge_model.fit(X_train,y_train)

train_predictions=ridge_model.predict(X_train)
test_predictions=ridge_model.predict(X_test)

from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))

print('train RMSE:',train_rmse)
print('test RMSE:',test_rmse)

train RMSE: 1948.5372043175976
test RMSE: 1574.5968305905444


## Hyperparameter Tuning
- Identifying the best alpha value for Ridge Regression

In [17]:
from sklearn.model_selection import GridSearchCV
estimator=Ridge()
param_grid={'alpha':list(range(1,11))}
model_hp=GridSearchCV(estimator,param_grid,cv=5)
model_hp.fit(X_train,y_train)
model_hp.best_params_

{'alpha': 10}

## Rebuilt Ridge Model using best hyperparameters

In [18]:
from sklearn.linear_model import Ridge
ridge_model=Ridge(alpha=10)
ridge_model.fit(X_train,y_train)

train_predictions=ridge_model.predict(X_train)
test_predictions=ridge_model.predict(X_test)

from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))

print('train RMSE:',train_rmse)
print('test RMSE:',test_rmse)

train RMSE: 1948.5372040742343
test RMSE: 1574.5968305905444


# Elasticnet Regression
Modeling with default parameters

In [19]:
from sklearn.linear_model import ElasticNet
enr_model=ElasticNet()
enr_model.fit(X_train,y_train)

test_predictions=enr_model.predict(X_test)
train_predictions=enr_model.predict(X_train)

from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))

print('train RMSE:',train_rmse)
print('test RMSE:',test_rmse)

train RMSE: 1948.5371869557355
test RMSE: 1574.5968305905449


## Hyperparameter Tuning

In [20]:
from sklearn.model_selection import GridSearchCV
estimator=ElasticNet()
param_grid={'alpha':[0.1,0.2,1,2,3,5,10],
            'l1_ratio':[0.1,0.5,0.75,0.9,0.95,1]}
enr_hp=GridSearchCV(estimator,param_grid,cv=5)
enr_hp.fit(X_train,y_train)
enr_hp.best_params_

{'alpha': 10, 'l1_ratio': 1}

## Rebuilt ElasticNet Regression Model using best hyperparameters

In [21]:
from sklearn.linear_model import ElasticNet
enr_model=ElasticNet(alpha=10, l1_ratio=1)
enr_model.fit(X_train,y_train)

test_predictions=enr_model.predict(X_test)
train_predictions=enr_model.predict(X_train)

from sklearn.metrics import mean_squared_error
train_rmse=np.sqrt(mean_squared_error(y_test,test_predictions))
test_rmse=np.sqrt(mean_squared_error(y_train,train_predictions))

print('train RMSE:',train_rmse)
print('test RMSE:',test_rmse)

train RMSE: 1948.5368936334041
test RMSE: 1574.5968305907172
