In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

In [4]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ml/Hitters.csv")
df.dropna(inplace=True)

In [5]:
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

**Model-Prediction**

In [7]:
svr_model = SVR(kernel='linear').fit(X_train, y_train) # nonlinear : "rbf"

In [17]:
print(svr_model.get_params())

{'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}


In [11]:
svr_model.intercept_

array([-80.15196063])

In [12]:
svr_model.coef_

array([[ -1.2183904 ,   6.09602978,  -3.67574533,   0.14217072,
          0.51435925,   1.28388992,  12.55922527,  -0.08693754,
          0.46597185,   2.98259931,   0.52944513,  -0.79820793,
         -0.16015531,   0.30872795,   0.28842348,  -1.79560066,
          6.41868986, -10.74313785,   1.33374319]])

In [13]:
y_pred = svr_model.predict(X_test)

In [14]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
rmse

370.0408415795005

**Model Tuning**

**default C: 1 , ceza hiperparametresi olan C'yi optimize etmemiz gerekiyor.**

**GridSearchCV ve Cross val kullanarak buna karar vereceğiz**

In [18]:
svr_params = {"C": [0.1, 0.5, 1, 3]}
# aranmasını istediğimiz parametreleri bir sözlük içerisinde ifade ediyoruz

In [22]:
svr_cv_model = GridSearchCV(svr_model, svr_params, cv=5).fit(X_train, y_train)
# svr_cv_model = GridSearchCV(svr_model, svr_params, cv=5, verbose=2 , n_jobs=-1).fit(X_train, y_train)

In [24]:
svr_cv_model.best_params_

{'C': 0.5}

In [26]:
svr_tuned = SVR(kernel="linear", C=0.5).fit(X_train, y_train)

In [27]:
y_pred = svr_tuned.predict(X_test)

In [28]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
rmse

367.98747616655294