In [None]:
# -*- coding: utf-8 -*-

"""

@ author: Taehyeong Kim

"""

import numpy as np
from scipy.stats import uniform
import pandas as pd

from sklearn.datasets import load_iris, load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, mean_absolute_error, mean_squared_error

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

* Classification

In [None]:
df = load_iris()
X=pd.DataFrame(df.data, columns=df.feature_names)
y=pd.Series(df.target, name="target")
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1011)
scaler = StandardScaler().fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
model = svm.SVC(random_state=1011)
distributions = dict(kernel=['linear', 'poly', 'rbf', 'sigmoid'],
                     C=uniform(loc=0, scale=4))
clf = RandomizedSearchCV(model, distributions, n_iter=100, random_state=1011)
search = clf.fit(X_train, y_train)
search.best_params_

{'C': 0.5868328269896832, 'kernel': 'linear'}

In [None]:
pd.DataFrame(search.cv_results_).sort_values("rank_test_score")[["params", "mean_test_score"]].head(5)

Unnamed: 0,params,mean_test_score
16,"{'C': 0.5868328269896832, 'kernel': 'linear'}",0.983333
21,"{'C': 1.0013670978634015, 'kernel': 'linear'}",0.983333
91,"{'C': 1.0381090506542021, 'kernel': 'linear'}",0.983333
60,"{'C': 0.671671489372919, 'kernel': 'linear'}",0.983333
87,"{'C': 1.464866065206715, 'kernel': 'linear'}",0.975


In [None]:
clf = svm.SVC(**search.best_params_, random_state=1011).fit(X_train, y_train)
y_pred=clf.predict(X_test)
y_true=y_test

In [None]:
print(classification_report(y_true, y_pred, target_names=df.target_names))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        14
  versicolor       0.88      1.00      0.93         7
   virginica       1.00      0.89      0.94         9

    accuracy                           0.97        30
   macro avg       0.96      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30



* Regression

In [None]:
df = load_boston()
X=pd.DataFrame(df.data, columns=df.feature_names)
y=pd.Series(df.target, name="target")
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CRIM     506 non-null    float64
 1   ZN       506 non-null    float64
 2   INDUS    506 non-null    float64
 3   CHAS     506 non-null    float64
 4   NOX      506 non-null    float64
 5   RM       506 non-null    float64
 6   AGE      506 non-null    float64
 7   DIS      506 non-null    float64
 8   RAD      506 non-null    float64
 9   TAX      506 non-null    float64
 10  PTRATIO  506 non-null    float64
 11  B        506 non-null    float64
 12  LSTAT    506 non-null    float64
dtypes: float64(13)
memory usage: 51.5 KB


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1011)
scaler = StandardScaler().fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
model = svm.SVR()
distributions = dict(kernel=['linear', 'poly', 'rbf', 'sigmoid'],
                     C=uniform(loc=0, scale=4),
                     epsilon=uniform(loc=0, scale=1))
clf = RandomizedSearchCV(model, distributions, n_iter=100, random_state=1011)
search = clf.fit(X_train, y_train)
search.best_params_

{'C': 3.8270706830140164, 'epsilon': 0.03795595986703715, 'kernel': 'rbf'}

In [None]:
pd.DataFrame(search.cv_results_).sort_values("rank_test_score")[["params", "mean_test_score"]].head(5)

Unnamed: 0,params,mean_test_score
3,"{'C': 3.8270706830140164, 'epsilon': 0.0379559...",0.75803
96,"{'C': 3.8015237325585844, 'epsilon': 0.3868553...",0.757036
54,"{'C': 3.86800505474196, 'epsilon': 0.795960085...",0.756249
79,"{'C': 3.4534347156733096, 'epsilon': 0.3134516...",0.749085
68,"{'C': 2.973358792223565, 'epsilon': 0.05071109...",0.739185


In [None]:
clf = svm.SVR(**search.best_params_).fit(X_train, y_train)
y_pred=clf.predict(X_test)
y_true=y_test

In [None]:
print("MAE :", mean_absolute_error(y_true, y_pred))
print("RMSE :", np.sqrt(mean_squared_error(y_true, y_pred)))

MAE : 2.7260709657337174
RMSE : 4.8955945256373266


In [None]:
y.describe()

count    506.000000
mean      22.532806
std        9.197104
min        5.000000
25%       17.025000
50%       21.200000
75%       25.000000
max       50.000000
Name: target, dtype: float64