### Wczytanie bibliotek

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR

from sklearn.metrics import make_scorer
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.model_selection import RandomizedSearchCV

### Podział danych

In [2]:
data = load_boston()
names = data.feature_names
predictors = data.data
targets = data.target
df = pd.concat([pd.DataFrame(predictors, columns=names), pd.DataFrame(targets, columns=['MEDV'])], axis=1)
cols_corr_manual = ['NOX', 'RAD']
df = df.drop(columns=cols_corr_manual)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)



### Regresja wektora wsparcia dla jądra liniowego o wartości parametru C=1

In [3]:
svm_reg = Pipeline([('scaler', StandardScaler()), ('svr', SVR(kernel='linear',C=1))])
svm_reg.fit(X_train, y_train)
pred = svm_reg.predict(X_test)
# Mean squared error
print('MSE:', mean_squared_error(y_test, pred))
# Mean absolute error
print('MAE:', mean_absolute_error(y_test, pred))
# Coefficient of determination
print('R2:', svm_reg.score(X_test,y_test))

MSE: 45.08912588560663
MAE: 3.847650706542578
R2: 0.44200949488171315


### Regresja wektora wsparcia dla jądra liniowego o wartości parametru C=10

In [4]:
svm_reg = Pipeline([('scaler', StandardScaler()), ('svr', SVR(kernel='linear',C=10))])
svm_reg.fit(X_train, y_train)
pred = svm_reg.predict(X_test)
# Mean squared error
print('MSE:', mean_squared_error(y_test, pred))
# Mean absolute error
print('MAE:', mean_absolute_error(y_test, pred))
# Coefficient of determination
print('R2:', svm_reg.score(X_test,y_test))

MSE: 45.871385717018185
MAE: 3.870153215346661
R2: 0.43232881134904755


### Wyszukanie najlepszych parametrów przez GridSearchCV

In [5]:
C_range = [1, 2, 4, 10]
gamma_range = np.logspace(-2, 2, 5)
parameters = {'kernel':('linear', 'rbf'),'gamma':gamma_range,'C':C_range}
svr = SVR()
grid = GridSearchCV(svr, parameters)
grid.fit(X_train, y_train)
classifier = grid.best_estimator_
print(classifier)


SVR(C=1, gamma=0.01, kernel='linear')


### Wyszukiwanie najlepszych parametrów pomiędzy GridSearchCV oraz RandomizedSearchCV

In [6]:
def fit_model (x,v):
    X_train, X_test, y_train, y_test = train_test_split(x, v, test_size=0.2, random_state=7)
    parameters = {'kernel':('linear', 'rbf'),'gamma':(np.logspace(-2, 2, 5)),'C':[1, 10]}
    svr = SVR()
    grid = GridSearchCV(svr, parameters)
    grid.fit(X_train, y_train)
    rand = RandomizedSearchCV(svr, parameters)
    rand.fit(X_train, y_train)
    if grid.score(X_test,y_test) > rand.score(X_test,y_test):
        return grid.best_estimator_
    else:
        return rand.best_estimator_
    
final = fit_model(X,y)
print(final.get_params())


# Mean squared error
print('MSE:', mean_squared_error(y_test, pred))
# Mean absolute error
print('MAE:', mean_absolute_error(y_test, pred))
# Coefficient of determination
print('R2:', final.score(X_test,y_test))

{'C': 1, 'cache_size': 200, 'coef0': 0.0, 'degree': 3, 'epsilon': 0.1, 'gamma': 1.0, 'kernel': 'linear', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}
MSE: 45.871385717018185
MAE: 3.870153215346661
R2: 0.4593587838236325
