In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from sklearn import svm
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, roc_auc_score, make_scorer
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler, PowerTransformer

In [None]:
data_path = './data'
path_model = './models'
pics = './images'

In [None]:
new_df = pd.read_csv(f'{data_path}/balanced_dataframe.csv')

In [None]:
X = new_df.drop(columns=['label'])
y = new_df.label
X.shape

### Data split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
y_test.value_counts()

### Scaling

In [None]:
scaler = StandardScaler()
# scaler = RobustScaler()
# scaler = PowerTransformer()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### SVC

In [None]:
svc = svm.SVC(random_state=0)

scoring = {"AUC": "roc_auc", "Accuracy": make_scorer(accuracy_score)}
param_grid = {'C':[1, 10, 100],
                  'gamma':[0.01,0.001, 'auto', 'scale']}

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid = GridSearchCV(svc, param_grid, cv=cv, scoring=scoring, refit='AUC', verbose=1, n_jobs=-1)

%time grid.fit(X_train, y_train)

print()
print(grid.best_params_)

y_pred = grid.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print("Accuracy:", accuracy)

score = roc_auc_score(y_test, y_pred)
print("Mean ROC AUC:", score)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=grid.classes_)
disp.plot()
plt.show()
# plt.savefig(f'{pics}/conf_scaled.jpg')

## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

print(classification_report(y_test, y_pred))

### Save model if needed

In [None]:
gaus_filename = f'{path_model}/gaussian.sav'
joblib.dump(nb, gaus_filename)

In [None]:
# nb = joblib.load(gaus_filename)