In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (10,6)
import warnings
warnings.filterwarnings("ignore")
warnings.warn("this will not show")

In [2]:
df = pd.read_csv("Advertising.csv")
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


### Train Test Split : 

In [12]:
X = df.drop("sales", axis=1)
y = df["sales"]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 101)

### Metrics : 

In [13]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

def train_val(model, X_train, y_train, X_test, y_test):
    
    y_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)
    
    scores = {"train": {"R2" : r2_score(y_train, y_train_pred),
    "mae" : mean_absolute_error(y_train, y_train_pred),
    "mse" : mean_squared_error(y_train, y_train_pred),                          
    "rmse" : np.sqrt(mean_squared_error(y_train, y_train_pred))},
    
    "test": {"R2" : r2_score(y_test, y_pred),
    "mae" : mean_absolute_error(y_test, y_pred),
    "mse" : mean_squared_error(y_test, y_pred),
    "rmse" : np.sqrt(mean_squared_error(y_test, y_pred))}}
    
    return pd.DataFrame(scores)


### Adaboost Regression : 

In [35]:
from sklearn.ensemble import AdaBoostRegressor

ada_model = AdaBoostRegressor(random_state = 101, learning_rate= 1.6, n_estimators= 450)
ada_model.fit(X_train, y_train)
train_val(ada_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.978248,0.965412
mae,0.640983,0.837818
mse,0.574831,1.024156
rmse,0.758176,1.012006


### Cross Control : 

In [36]:
from sklearn.model_selection import cross_validate, cross_val_score
model = AdaBoostRegressor(random_state=101, learning_rate= 1.6, n_estimators= 450)
scores = cross_validate(model, X_train, y_train, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)
pd.DataFrame(scores)
pd.DataFrame(scores).mean()[2:]

test_r2                             0.950946
test_neg_mean_absolute_error       -0.872487
test_neg_mean_squared_error        -1.170005
test_neg_root_mean_squared_error   -1.066523
dtype: float64

### Gradient Regression : 

In [64]:
from sklearn.ensemble import GradientBoostingRegressor

gr_model = GradientBoostingRegressor(random_state = 101, learning_rate=0.1, n_estimators= 200, subsample=0.8, max_depth=3)
gr_model.fit(X_train, y_train)

train_val(gr_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,1.0,0.99
mae,0.074,0.441
mse,0.008,0.293
rmse,0.091,0.542


### Cross Validate Control : 

In [65]:
model = GradientBoostingRegressor(random_state = 101, learning_rate=0.1, n_estimators= 200, subsample=0.8, max_depth=3)

scores = cross_validate(model, X_train, y_train, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)
pd.DataFrame(scores)
pd.DataFrame(scores).mean()[2:]

test_r2                             0.974
test_neg_mean_absolute_error       -0.571
test_neg_mean_squared_error        -0.603
test_neg_root_mean_squared_error   -0.757
dtype: float64

### XGBRegressor : 

In [67]:
!pip install xgboost



In [160]:
from xgboost import XGBRegressor

xgb_model = XGBRegressor(random_state = 101, n_estimators = 300, learning_rate = 0.1, max_depth = 4, min_child_weight= 3,
                        gamma = 0.01, reg_alpha = 0, reg_lambda = 1)

xgb_model.fit(X_train, y_train)

train_val(xgb_model,X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,1.0,0.984
mae,0.071,0.554
mse,0.01,0.484
rmse,0.098,0.696


### Cross kontrol : 

In [119]:
model = XGBRegressor(random_state = 101, n_estimators = 300, learning_rate = 0.1, max_depth = 4, min_child_weight= 3,
                        gamma = 0.01, reg_alpha = 0, reg_lambda = 1)

scores = cross_validate(model, X_train, y_train, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)
pd.DataFrame(scores).iloc[:, 2:].mean()

test_r2                             0.968
test_neg_mean_absolute_error       -0.653
test_neg_mean_squared_error        -0.799
test_neg_root_mean_squared_error   -0.865
dtype: float64

### Random Forest : 

In [163]:
from sklearn.ensemble import RandomForestRegressor

rnd_model = RandomForestRegressor(random_state = 101, max_depth= 10, n_estimators=300, ccp_alpha = 0, min_samples_split = 2)
rnd_model.fit(X_train, y_train)

train_val(rnd_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.997,0.981
mae,0.224,0.603
mse,0.082,0.559
rmse,0.287,0.747


### Cross Validate Control : 

In [156]:
model = RandomForestRegressor(random_state=101, max_depth= 10, n_estimators=300, ccp_alpha = 0, min_samples_split = 2)

scores = cross_validate(model, X_train, y_train, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)

pd.DataFrame(scores).iloc[:, 2:].mean()

test_r2                             0.973
test_neg_mean_absolute_error       -0.612
test_neg_mean_squared_error        -0.612
test_neg_root_mean_squared_error   -0.768
dtype: float64

### Decision Tree Regressor : 

In [157]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor(random_state = 101, max_depth= 10, min_samples_split = 3)
dt_model.fit(X_train, y_train)

train_val(dt_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.999,0.971
mae,0.107,0.726
mse,0.032,0.862
rmse,0.18,0.928


### cross validate : 

In [164]:
model = DecisionTreeRegressor(random_state = 101, max_depth= 10, min_samples_split = 3)

scores = cross_validate(model, X_train, y_train, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)
pd.DataFrame(scores).iloc[:, 2:].mean()

test_r2                             0.947
test_neg_mean_absolute_error       -0.863
test_neg_mean_squared_error        -1.175
test_neg_root_mean_squared_error   -1.077
dtype: float64

### lassoCv : 

In [169]:
from sklearn.linear_model import LassoCV

lasso_model = LassoCV(random_state = 101)
lasso_model.fit(X_train, y_train)
train_val(lasso_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.887,0.931
mae,1.299,1.113
mse,2.979,2.049
rmse,1.726,1.431


### ridgeCV : 

In [172]:
from sklearn.linear_model import RidgeCV

ridge_model = RidgeCV()
ridge_model.fit(X_train, y_train)
train_val(ridge_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.888,0.93
mae,1.291,1.124
mse,2.969,2.063
rmse,1.723,1.436


### SVR Regression : 

In [203]:
from sklearn.svm import SVR

svr_model = SVR( kernel= "linear", C = 1.0, degree=3, gamma= "scale")
svr_model.fit(X_train, y_train)
train_val(svr_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.874,0.914
mae,1.222,1.203
mse,3.331,2.539
rmse,1.825,1.593


### En iyi skoru GradientBoos model de aldık : 

In [204]:
final_model = GradientBoostingRegressor(random_state = 101, learning_rate=0.1, n_estimators= 200, subsample=0.8, max_depth=3)
final_model.fit(X_train, y_train)

GradientBoostingRegressor(n_estimators=200, random_state=101, subsample=0.8)

In [207]:
df.loc[57]

TV          136.200
radio        19.200
newspaper    16.600
sales        13.200
Name: 57, dtype: float64

In [208]:
final_model.predict([[136.20, 19.20, 16.60]])

array([13.18392057])

### Tahminleme mükemmel 