Burdaki amaç bir marjin aralığında maksimun noktayı en küçük hata ile alabilecek şekilde doğru ya da eğriyi belirlemektir.

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
#%matplotlib notebook
plt.rcParams["figure.figsize"] = (10,6)
import warnings
warnings.filterwarnings("ignore")
warnings.warn("this will not show")
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
df = pd.read_csv("Advertising.csv")
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [3]:
from sklearn.model_selection import train_test_split

X = df.drop("sales", axis=1)
y = df["sales"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 101)

In [4]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

def train_val(model, X_train, y_train, X_test, y_test):
    
    y_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)
    
    scores = {"train": {"R2" : r2_score(y_train, y_train_pred),
    "mae" : mean_absolute_error(y_train, y_train_pred),
    "mse" : mean_squared_error(y_train, y_train_pred),                          
    "rmse" : np.sqrt(mean_squared_error(y_train, y_train_pred))},
    
    "test": {"R2" : r2_score(y_test, y_pred),
    "mae" : mean_absolute_error(y_test, y_pred),
    "mse" : mean_squared_error(y_test, y_pred),
    "rmse" : np.sqrt(mean_squared_error(y_test, y_pred))}}
    
    return pd.DataFrame(scores)

In [5]:
from sklearn.svm import SVR

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

operations = [("scaler", StandardScaler()), ("svr", SVR())]
Pipeline(steps=operations)
model = Pipeline(steps=operations)
model.fit(X_train, y_train)

train_val(model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.917,0.945
mae,0.804,0.835
mse,2.185,1.637
rmse,1.478,1.279


In [34]:
from sklearn.model_selection import cross_validate

operations = [('scaler',StandardScaler()),('svr',SVR())]
model = Pipeline(operations)

scores = cross_validate(model, X_train, y_train, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)
df_scores = pd.DataFrame(scores, index = range(1, 11))
df_scores.mean()[2:]

test_r2                             0.871
test_neg_mean_absolute_error       -1.016
test_neg_mean_squared_error        -3.085
test_neg_root_mean_squared_error   -1.689
dtype: float64

In [35]:
operations = [('scaler',StandardScaler()),('svr',SVR())]
model = Pipeline(operations)

scores = cross_validate(model, X_test, y_test, scoring=['r2', 
            'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv =10)
df_scores = pd.DataFrame(scores, index = range(1, 11))
df_scores.mean()[2:]

test_r2                              0.338
test_neg_mean_absolute_error        -2.297
test_neg_mean_squared_error        -10.669
test_neg_root_mean_squared_error    -2.872
dtype: float64

In [37]:
from sklearn.model_selection import GridSearchCV

operations = [("scaler", StandardScaler()), ("svr", SVR())]
Pipeline(steps=operations)
model = Pipeline(steps=operations)

model.get_params()

{'memory': None,
 'steps': [('scaler', StandardScaler()), ('svr', SVR())],
 'verbose': False,
 'scaler': StandardScaler(),
 'svr': SVR(),
 'scaler__copy': True,
 'scaler__with_mean': True,
 'scaler__with_std': True,
 'svr__C': 1.0,
 'svr__cache_size': 200,
 'svr__coef0': 0.0,
 'svr__degree': 3,
 'svr__epsilon': 0.1,
 'svr__gamma': 'scale',
 'svr__kernel': 'rbf',
 'svr__max_iter': -1,
 'svr__shrinking': True,
 'svr__tol': 0.001,
 'svr__verbose': False}

In [40]:
param_grid = {'svr__C': [0.01,0.1,1,10,1000],
              'svr__gamma': ["scale", "auto", 0.2, 0.3, 0.4],
              'svr__kernel': ['rbf', 'linear', "poly", 'sigmoid']}

svr_model_grid = GridSearchCV(model, param_grid, scoring="neg_mean_squared_error", cv=10)
svr_model_grid.fit(X_train, y_train)

GridSearchCV(cv=10,
             estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('svr', SVR())]),
             param_grid={'svr__C': [0.01, 0.1, 1, 10, 1000],
                         'svr__gamma': ['scale', 'auto', 0.2, 0.3, 0.4],
                         'svr__kernel': ['rbf', 'linear', 'poly', 'sigmoid']},
             scoring='neg_mean_squared_error')

In [41]:
train_val(svr_model_grid, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.985,0.989
mae,0.332,0.425
mse,0.388,0.333
rmse,0.623,0.577


In [42]:
svr_model_grid.best_params_

{'svr__C': 10, 'svr__gamma': 0.2, 'svr__kernel': 'rbf'}

In [43]:
operations = [("scaler", StandardScaler()), ("svr",  SVR(kernel='rbf', gamma= 0.2, C=10))]
Pipeline(steps=operations)
final_model = Pipeline(steps=operations)
final_model.fit(X, y)


Pipeline(steps=[('scaler', StandardScaler()), ('svr', SVR(C=10, gamma=0.2))])

In [44]:
y_pred = final_model.predict(X)

In [45]:
r2_score(y, y_pred)

0.9853062264277094

In [49]:
np.sqrt(mean_squared_error(y, y_pred))

0.6308659015191617

In [50]:
df.head(1)

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1


In [51]:
fiyat = [[230.100], [37.800],[69.200]]
fiyat = pd.DataFrame(fiyat).T
final_model.predict(fiyat)

array([21.79686242])

In [52]:
svr_model_grid.best_params_

{'svr__C': 10, 'svr__gamma': 0.2, 'svr__kernel': 'rbf'}