In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

In [2]:
df = pd.read_csv("sales.csv")
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230100,37800,69200,22100
1,44500,39300,45100,10400
2,17200,45900,69300,9300
3,151500,41300,58500,18500
4,180800,10800,58400,12900


In [3]:
X = df.drop("sales", axis = 1)
y = df["sales"]

In [4]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score

***Best Random State***

In [6]:
sc = StandardScaler()

In [17]:
train_score = []
test_score = []
cv_score = []
MAE = []

for i in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = i)
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    model = LinearRegression().fit(X_train, y_train)
    
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)
    
    train_accuracy = model.score(X_train, y_train)
    test_accuracy = model.score(X_test, y_test)
    score = cross_val_score(model, X_train, y_train, cv = 5).mean()
    mae = mean_absolute_error(y_test, pred_test)
    
    train_score.append(train_accuracy)
    test_score.append(test_accuracy)
    cv_score.append(score)
    MAE.append(mae)

random_state = pd.DataFrame({
    "Train": train_score,
    "Test": test_score,
    "CV": cv_score,
    "MAE": mae
})

In [18]:
random_state

Unnamed: 0,Train,Test,CV,MAE
0,0.910395,0.824731,0.903741,1197.278812
1,0.897886,0.882855,0.887340,1197.278812
2,0.908001,0.737701,0.904416,1197.278812
3,0.893460,0.916130,0.881252,1197.278812
4,0.889656,0.911100,0.870441,1197.278812
...,...,...,...,...
95,0.898879,0.872151,0.873377,1197.278812
96,0.903069,0.857934,0.893503,1197.278812
97,0.889993,0.920709,0.880778,1197.278812
98,0.888354,0.932482,0.871481,1197.278812


## ***GridSearchCV For - PolynomialFeatures***

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 3)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [24]:
estimator = PolynomialFeatures()

param_grid = {"degree": list(range(1, 10))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'degree': 1}

# ***GridSearchCV - Ridge Regression***

In [25]:
estimator = Ridge()

param_grid = {"alpha": list(range(1, 50))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'alpha': 3}

# ***GridSearchCV - Lasso Regression***

In [28]:
estimator = Lasso()

param_grid = {"alpha": list(range(99, 150))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'alpha': 137}

# ***GridSearchCV - ElasticNet***

In [32]:
estimator = ElasticNet()

param_grid = {"alpha": list(range(100, 150)), "l1_ratio": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'alpha': 137, 'l1_ratio': 1}

# ***GridSearchCV - SVC***

In [38]:
estimator = SVR()

param_grid = {"C": list(range(201, 300)), "kernel": ["linear", "poly", "sigmoid", "rbf"]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'C': 245, 'kernel': 'linear'}

# ***GridSearchCV - KNearestNeighbors - KNeighborsRegressor***

In [43]:
estimator = KNeighborsRegressor()

param_grid = {"n_neighbors": list(range(1, 20))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'n_neighbors': 6}

# ***GridSearchCV - Decision Tree - DecisionTreeRegressor***

In [53]:
estimator = DecisionTreeRegressor(random_state = 0)

param_grid = {"max_depth": list(range(1, 20))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print(grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

{'max_depth': 7}


array([0.64031011, 0.35657512, 0.00311477])

# ***GridSearchCV - RandomForestRegressor***

In [56]:
estimator = RandomForestRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(1, 20))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'n_estimators': 18}


array([0.66731905, 0.32157026, 0.01111069])

# ***AdaBoostRegressor***

In [58]:
estimator = AdaBoostRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(19, 40))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'n_estimators': 38}


array([0.72789405, 0.26141767, 0.01068827])

# ***GradientBoostingRegressor***

In [61]:
estimator = GradientBoostingRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(39, 60)), "learning_rate": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'learning_rate': 0.2, 'n_estimators': 55}


array([0.64499165, 0.35142512, 0.00358323])

# ***XGBRegressor***

In [69]:
estimator = XGBRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(120, 150)), "learning_rate": [0, 0.1, 0.2, 0.3],
             "gamma":[0, 0.1, 0.2, 0.3] }

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'gamma': 0.3, 'learning_rate': 0.1, 'n_estimators': 143}


array([0.57699984, 0.41846222, 0.00453788], dtype=float32)