In [None]:
!pip install scikit-learn-intelex 

In [3]:
from sklearnex import patch_sklearn
patch_sklearn()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report 
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.svm import SVC
import pickle 

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [4]:
dataset = pd.read_csv('imagesFeatures.csv')

x_train, x_test, y_train, y_test = train_test_split(dataset.drop(columns = ['Y']).values, dataset['Y'].values, test_size = 0.2)

ss = StandardScaler()
x_train = ss.fit_transform(x_train) 
x_test = ss.transform(x_test)

In [9]:
params_grid = [{'kernel': ['rbf'], 'gamma': ['auto', 'scale'], 'C': [1, 10, 100]},
              {'kernel': ['linear'], 'C': [1, 10, 100]}]

svm_model = GridSearchCV(SVC(), params_grid, cv=5)
svm_model.fit(x_train, y_train)


GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [1, 10, 100], 'gamma': ['auto', 'scale'],
                          'kernel': ['rbf']},
                         {'C': [1, 10, 100], 'kernel': ['linear']}])

In [11]:
svm_model.best_params_

{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}

In [12]:
model = svm_model.best_estimator_
y_pred = model.predict(x_test)

accReport = classification_report(y_test, y_pred, output_dict=True)
print(accReport['accuracy']) 
df = pd.DataFrame(accReport).drop(columns = ['accuracy']).transpose() 
print(df.to_latex())

pickle.dump(model, open(f"svm_acc={accReport['accuracy']}.pkl", 'wb'))
pickle.dump(df.to_latex(), open('svm_classification_report_in_latex.txt', 'wb'))

0.7399850709131625
\begin{tabular}{lrrrr}
\toprule
{} &  precision &    recall &  f1-score &  support \\
\midrule
False        &   0.753231 &  0.799558 &  0.775703 &   2260.0 \\
True         &   0.720370 &  0.663445 &  0.690737 &   1759.0 \\
macro avg    &   0.736800 &  0.731501 &  0.733220 &   4019.0 \\
weighted avg &   0.738849 &  0.739985 &  0.738516 &   4019.0 \\
\bottomrule
\end{tabular}



In [13]:
estimators = [
    ('RandomForestClassifier', RandomForestClassifier()),
    ('AdaBoostClassifier', AdaBoostClassifier()),
    ('SVM', SVC(**svm_model.best_params_))
]
model = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=10, cv = KFold(n_splits=5))

model.fit(x_train, y_train)
y_pred = model.predict(x_test)
accReport = classification_report(y_test, y_pred, output_dict=True)
print(accReport['accuracy']) 
df = pd.DataFrame(accReport).drop(columns = ['accuracy']).transpose()
print(df.to_latex())

0.749440159243593
\begin{tabular}{lrrrr}
\toprule
{} &  precision &    recall &  f1-score &  support \\
\midrule
False        &   0.766482 &  0.797345 &  0.781609 &   2260.0 \\
True         &   0.725420 &  0.687891 &  0.706157 &   1759.0 \\
macro avg    &   0.745951 &  0.742618 &  0.743883 &   4019.0 \\
weighted avg &   0.748510 &  0.749440 &  0.748586 &   4019.0 \\
\bottomrule
\end{tabular}



In [14]:
pickle.dump(svm_model, open(f"StackingClassifier_acc={accReport['accuracy']}.pkl", 'wb'))
pickle.dump(df.to_latex(), open('StackingClassifier_classification_report_in_latex.txt', 'wb')) 