In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures # for polynomial features
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder


In [2]:
df = pd.read_csv(r"..\Cases\Glass Identification\Glass.csv")
le = LabelEncoder()
y = le.fit_transform(df['Type'])
X = df.drop('Type', axis=1)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24, stratify=y)

In [4]:
from sklearn.svm import SVC

svc_ovo = SVC(C=0.5, kernel='linear', decision_function_shape='ovo')

svc_ovo.fit(X_train, y_train)
y_pred = svc_ovo.predict(X_test)

In [5]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss, roc_auc_score

print(accuracy_score(y_test, y_pred))

0.5384615384615384


In [13]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import StratifiedKFold
import numpy as np

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='linear', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 30),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__decision_function_shape':['ovo', 'ovr'],
          }

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss'
                    )

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='linear', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='linear', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'linear', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
{'SCL': None, 'SVC__C': 0.6905172413793103, 'SVC__decision_function_shape': 'ovo'}
-0.9318622677520683


In [16]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='poly', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__degree':[2,3],
          'SVC__coef0':np.linspace(0,3,5),
          'SVC__decision_function_shape':['ovr','ovo']
          }

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss'
                    )

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='poly', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='poly', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'poly', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
{'SCL': StandardScaler(), 'SVC__C': 0.26410526315789473, 'SVC__coef0': 3.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3}
-0.8912631607153946


In [19]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='rbf', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__gamma':np.linspace(0.001,5,5),
           'SVC__decision_function_shape':['ovr','ovo']
          }

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss'
                    )

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'rbf', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
{'SCL': None, 'SVC__C': 1.5796315789473683, 'SVC__decision_function_shape': 'ovr', 'SVC__gamma': 1.2507499999999998}
-0.793728469951138


In [20]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='sigmoid', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__gamma':np.linspace(0.001,5,5),
          'SVC__coef0':np.linspace(0,3,5),
           'SVC__decision_function_shape':['ovr','ovo']
          }

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss'
                    )

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='sigmoid', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='sigmoid', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'sigmoid', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
{'SCL': StandardScaler(), 'SVC__C': 1.5796315789473683, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__gamma': 0.001}
-1.0358929844317568
