In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold,train_test_split,cross_val_score,GridSearchCV
from sklearn.metrics import classification_report,confusion_matrix,mean_absolute_error,r2_score
from sklearn.pipeline import make_pipeline,Pipeline
from sklearn.preprocessing import OneHotEncoder,LabelEncoder,StandardScaler
from drawdata import draw_scatter
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifier
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')

In [25]:
x=pd.read_csv('4_class_classification.csv').iloc[:,1:]
y=x.pop('z').values
y=LabelEncoder().fit_transform(y.reshape(-1,1))
x=x.values
x[:,1]*=1000
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.8,stratify=y)
print(x_train.shape)
print(y_train.shape)

(289, 2)
(289,)


# make pipeline

In [20]:
pipe=make_pipeline(StandardScaler(),SVC())
params={'svc__C':[0.001,0.1,1000,10000,1000000],
       'svc__class_weight': ['balanced',None],
       'svc__kernel':['rbf']}
cls=GridSearchCV(pipe,params,cv=5,scoring='f1_macro',verbose=10)
cls.fit(x_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5; 1/10] START svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf..
[CV 1/5; 1/10] END svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf;, score=0.123 total time=   0.0s
[CV 2/5; 1/10] START svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf..
[CV 2/5; 1/10] END svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf;, score=0.067 total time=   0.0s
[CV 3/5; 1/10] START svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf..
[CV 3/5; 1/10] END svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf;, score=0.137 total time=   0.0s
[CV 4/5; 1/10] START svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf..
[CV 4/5; 1/10] END svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf;, score=0.137 total time=   0.0s
[CV 5/5; 1/10] START svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf..
[CV 5/5; 1/10] END svc__C=0.001, svc__class_weight=balanced, svc__kernel=rbf;, score=0.075 tot

[CV 5/5; 9/10] END svc__C=1000000, svc__class_weight=balanced, svc__kernel=rbf;, score=0.903 total time=   0.1s
[CV 1/5; 10/10] START svc__C=1000000, svc__class_weight=None, svc__kernel=rbf...
[CV 1/5; 10/10] END svc__C=1000000, svc__class_weight=None, svc__kernel=rbf;, score=0.870 total time=   0.0s
[CV 2/5; 10/10] START svc__C=1000000, svc__class_weight=None, svc__kernel=rbf...
[CV 2/5; 10/10] END svc__C=1000000, svc__class_weight=None, svc__kernel=rbf;, score=0.895 total time=   0.1s
[CV 3/5; 10/10] START svc__C=1000000, svc__class_weight=None, svc__kernel=rbf...
[CV 3/5; 10/10] END svc__C=1000000, svc__class_weight=None, svc__kernel=rbf;, score=0.805 total time=   0.0s
[CV 4/5; 10/10] START svc__C=1000000, svc__class_weight=None, svc__kernel=rbf...
[CV 4/5; 10/10] END svc__C=1000000, svc__class_weight=None, svc__kernel=rbf;, score=0.896 total time=   0.0s
[CV 5/5; 10/10] START svc__C=1000000, svc__class_weight=None, svc__kernel=rbf...
[CV 5/5; 10/10] END svc__C=1000000, svc__class_

In [21]:
y_pred=cls.predict(x_test)
print(cls.best_estimator_)
print(confusion_matrix(y_test,y_pred))

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=1000, class_weight='balanced'))])
[[21  1  1  0]
 [ 1 11  0  0]
 [ 1  0 27  0]
 [ 0  0  0 10]]


# Pipeline

In [35]:
pipe=Pipeline([('scaler', StandardScaler()),
            ('estimator', KNeighborsClassifier())])
params=[{'estimator':[KNeighborsClassifier()],
         'estimator__n_neighbors':[4,6,10],
        'scaler':[StandardScaler(),'passthrough']},
       {'estimator':[SVC()],
         'estimator__C':[10,1000],
        'scaler':[StandardScaler(),'passthrough']},
        {'estimator':[RandomForestClassifier()],
         'estimator__min_samples_leaf':[3,6],
        'scaler':[StandardScaler(),'passthrough']},
       {'estimator':[RidgeClassifier()],
        'scaler':[StandardScaler(),'passthrough']}]
cls=GridSearchCV(pipe,params,cv=3,scoring='f1_macro',verbose=10)
cls.fit(x_train,y_train)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV 1/3; 1/16] START estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=StandardScaler()
[CV 1/3; 1/16] END estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=StandardScaler();, score=0.901 total time=   0.0s
[CV 2/3; 1/16] START estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=StandardScaler()
[CV 2/3; 1/16] END estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=StandardScaler();, score=0.945 total time=   0.0s
[CV 3/3; 1/16] START estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=StandardScaler()
[CV 3/3; 1/16] END estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=StandardScaler();, score=0.844 total time=   0.0s
[CV 1/3; 2/16] START estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=passthrough
[CV 1/3; 2/16] END estimator=KNeighborsClassifier(), estimator__n_neighbors=4, scaler=passthrough;, score=0.373 tota

[CV 1/3; 13/16] END estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=StandardScaler();, score=0.917 total time=   0.2s
[CV 2/3; 13/16] START estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=StandardScaler()
[CV 2/3; 13/16] END estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=StandardScaler();, score=0.886 total time=   0.2s
[CV 3/3; 13/16] START estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=StandardScaler()
[CV 3/3; 13/16] END estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=StandardScaler();, score=0.816 total time=   0.3s
[CV 1/3; 14/16] START estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=passthrough
[CV 1/3; 14/16] END estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler=passthrough;, score=0.944 total time=   0.3s
[CV 2/3; 14/16] START estimator=RandomForestClassifier(), estimator__min_samples_leaf=6, scaler

In [37]:
cls.score(x_test,y_test)
y_pred=cls.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print(cls.best_estimator_)

[[22  1  0  0]
 [ 0 12  0  0]
 [ 1  0 27  0]
 [ 0  0  0 10]]
Pipeline(steps=[('scaler', StandardScaler()), ('estimator', SVC(C=10))])
