In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, scale
from sklearn.svm import SVR

from warnings import filterwarnings
filterwarnings("ignore")

# SVR

In [3]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()

#one-hot encoding
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])

y = df["Salary"]
X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis = 1).astype("float64")
X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, random_state = 42)

### Model & Tahmin

In [15]:
svr_model = SVR(kernel = "linear").fit(X_train, y_train)

In [16]:
svr_model

In [17]:
svr_model.predict(X_train)[0:5]

array([219.32622627, 702.43039317, 623.20559641, 153.77538484,
       463.15191157])

In [18]:
svr_model.predict(X_test)[0:5]

array([679.14754919, 633.72883529, 925.68639938, 270.28464317,
       530.26659421])

In [19]:
svr_model.intercept_

array([-80.15196063])

In [20]:
svr_model.coef_

array([[ -1.2183904 ,   6.09602978,  -3.67574533,   0.14217072,
          0.51435925,   1.28388992,  12.55922527,  -0.08693754,
          0.46597185,   2.98259931,   0.52944513,  -0.79820793,
         -0.16015531,   0.30872795,   0.28842348,  -1.79560066,
          6.41868986, -10.74313785,   1.33374319]])

In [21]:
# test
y_pred = svr_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

370.0408415795005

In [13]:
# rbf icin degeri gorelim
svr_model = SVR(kernel = "rbf").fit(X_train, y_train)
y_pred = svr_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

460.0032657244849

In [22]:
svr_params = {"C": [0.1, 0.5, 1, 3]}

In [26]:
svr_cv_model = GridSearchCV(svr_model, svr_params, cv = 5, verbose = 2, n_jobs = -1).fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [24]:
svr_cv_model.best_params_

{'C': 0.5}

In [27]:
svr_tuned = SVR(kernel="linear", C= 0.5).fit(X_train, y_train)

In [28]:
y_pred = svr_tuned.predict(X_test)

In [29]:
np.sqrt(mean_squared_error(y_test, y_pred))

367.98747616655294

[CV] END ..............................................C=0.1; total time=   0.8s
[CV] END ................................................C=1; total time=  12.9s
[CV] END ..............................................C=0.1; total time=   0.9s
[CV] END ................................................C=1; total time=   9.0s
[CV] END ..............................................C=0.5; total time=   3.7s
[CV] END ................................................C=3; total time=  19.2s
[CV] END ..............................................C=0.5; total time=   3.2s
[CV] END ................................................C=3; total time=  16.6s
[CV] END ..............................................C=0.5; total time=   3.9s
[CV] END ..............................................C=0.1; total time=   0.6s
[CV] END ................................................C=1; total time=   8.9s
[CV] END ..............................................C=0.1; total time=   0.8s
[CV] END ...................