# SUPPORT VECTOR MECHINES

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale, StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import neighbors
from sklearn.svm import SVR

In [2]:
from warnings import filterwarnings
filterwarnings("ignore")

In [3]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])
y = df["Salary"]
X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis=1).astype("float64")
X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.25,
                                                    random_state=42)

In [4]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 197 entries, 183 to 133
Data columns (total 19 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   AtBat        197 non-null    float64
 1   Hits         197 non-null    float64
 2   HmRun        197 non-null    float64
 3   Runs         197 non-null    float64
 4   RBI          197 non-null    float64
 5   Walks        197 non-null    float64
 6   Years        197 non-null    float64
 7   CAtBat       197 non-null    float64
 8   CHits        197 non-null    float64
 9   CHmRun       197 non-null    float64
 10  CRuns        197 non-null    float64
 11  CRBI         197 non-null    float64
 12  CWalks       197 non-null    float64
 13  PutOuts      197 non-null    float64
 14  Assists      197 non-null    float64
 15  Errors       197 non-null    float64
 16  League_N     197 non-null    uint8  
 17  Division_W   197 non-null    uint8  
 18  NewLeague_N  197 non-null    uint8  
dtypes: flo

### Model ve Tahmin

In [24]:
svr_model = SVR("linear").fit(X_train, y_train)

In [25]:
dir(svr_model)

['C',
 '__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_n_features',
 '_compute_kernel',
 '_decision_function',
 '_dense_decision_function',
 '_dense_fit',
 '_dense_predict',
 '_dual_coef_',
 '_estimator_type',
 '_gamma',
 '_get_coef',
 '_get_param_names',
 '_get_tags',
 '_impl',
 '_intercept_',
 '_more_tags',
 '_n_support',
 '_pairwise',
 '_probA',
 '_probB',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_sparse',
 '_sparse_decision_function',
 '_sparse_fit',
 '_sparse_kernels',
 '_sparse_predict',
 '_validate_data',
 '_validate_for_predict',
 '_validate_targets',
 '_warn_from

In [26]:
svr_model.predict(X_test)[:5]

array([679.14754685, 633.72883069, 925.68640849, 270.28463621,
       530.26659184])

In [27]:
svr_model.predict(X_train)[:5]

array([219.32622169, 702.4303869 , 623.2055964 , 153.77538476,
       463.15190834])

In [28]:
svr_model.intercept_

array([-80.15196151])

In [29]:
svr_model.coef_

array([[ -1.21839037,   6.09602969,  -3.67574533,   0.14217075,
          0.51435919,   1.28388986,  12.55922537,  -0.08693755,
          0.46597184,   2.98259944,   0.52944523,  -0.79820799,
         -0.16015534,   0.30872794,   0.28842348,  -1.79560067,
          6.41868985, -10.74313783,   1.33374317]])

In [30]:
#TEST
y_pred = svr_model.predict(X_test)

In [31]:
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

370.04084185624924

### SVR MODEL TUNING

In [34]:
svr_model = SVR("linear")

In [35]:
svr_model

SVR(kernel='linear')

In [36]:
svr_params = {"C":[.1, .5, 1, 3]}

In [40]:
svr_cv_model = GridSearchCV(svr_model, svr_params, cv=5, verbose=2, n_jobs=-1).fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:  2.6min finished


In [41]:
svr_cv_model.best_params_

{'C': 0.5}

In [42]:
svr_tuned = SVR("linear", C=.5).fit(X_train, y_train)

In [43]:
y_pred = svr_tuned.predict(X_test)

In [45]:
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

367.9874739022889