In [None]:
import numpy as np
import pandas as pd

In [None]:
queen_train_path = "../../feature_extraction/CQT_84_features/train/train_queen.npy"
queenless_train_path = "../../feature_extraction/CQT_84_features/train/train_queenless.npy"
queen_test_path = "../../feature_extraction/CQT_84_features/test/train_queen.npy"
queenless_test_path = "../../feature_extraction/CQT_84_features/test/train_queenless.npy"
queen_val_path = "../../feature_extraction/CQT_84_features/val/train_queen.npy"
queenless_val_path = "../../feature_extraction/CQT_84_features/val/train_queenless.npy"

In [None]:
queen_train = np.load(queen_train_path)
queenless_train = np.load(queenless_train_path)
queen_test = np.load(queen_test_path)
queenless_test = np.load(queenless_test_path)
queen_val = np.load(queen_val_path)
queenless_val = np.load(queenless_val_path)


In [None]:
train_label = pd.DataFrame(np.hstack([[0]*7000, [1]*7000]).T, columns=['labels'])


In [None]:
train_data = pd.DataFrame(np.vstack([queen_train, queenless_train]))



In [None]:
train_data

In [None]:
queenless_test.shape

In [None]:
test_label = pd.DataFrame(np.hstack([[0]*3000, [1]*3000]).T, columns=['labels'])
test_data = pd.DataFrame(np.vstack([queen_test, queen_val, queenless_test, queenless_val]))



In [None]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier, BaggingClassifier


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_scaled = scaler.fit_transform(train_data)

In [None]:
X_test_scaled = scaler.transform(test_data)

In [None]:
X_scaled

In [None]:
import time
from sklearn.model_selection import RandomizedSearchCV

t1 = time.time()
extra_trees_search={'n_estimators': [int(x) for x in np.arange(50, 126, 5)],
                    'max_features': [int(x) for x in np.arange(50, 401, 50)],
                    'min_samples_leaf':  [int(x) for x in np.arange(20, 51, 5)],
                    'min_samples_split': [int(x) for x in np.arange(15, 36, 5)],
                    }
base_model = ExtraTreesClassifier(random_state = 1337)
clf = RandomizedSearchCV(base_model, extra_trees_search, scoring='accuracy', n_iter=50, random_state=1337, verbose=3)
clf.fit(X_scaled, np.array(train_label).ravel())

clf.fit(X_scaled, np.array(train_label).ravel())
t2 = time.time()

In [None]:
print(f"time elapsed: {t2-t1} seconds or {int((t2-t1)//60)} minutes and {int((t2-t1)-(t2-t1)//60*60)} seconds")

In [None]:
print(accuracy_score(test_label,clf.predict(X_test_scaled)))

In [None]:
from sklearn.metrics import classification_report, roc_curve, RocCurveDisplay, roc_auc_score, auc


In [None]:
print(classification_report(test_label,clf.predict(X_test_scaled), target_names=["Queen", "Queenless"]))

In [None]:
roc_curve(test_label,clf.predict(X_test_scaled))

In [None]:
import matplotlib.pyplot as plt

In [None]:
RocCurveDisplay.from_predictions(
    np.array(test_label.values).ravel(),
    np.array(clf.predict(X_test_scaled)).ravel(),
    name="micro-average OvR",
    color="darkorange",

)
plt.axis("square")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Micro-averaged One-vs-Rest\nReceiver Operating Characteristic")
plt.legend()
plt.show()

In [None]:
y_pred_proba = clf.predict_proba(X_test_scaled)[::,1]
fpr, tpr, _ = roc_curve(test_label,  y_pred_proba)
auc = roc_auc_score(test_label, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

In [None]:
from sklearn.metrics import classification_report, roc_curve, RocCurveDisplay, roc_auc_score, auc

fpr, tpr, thresholds = roc_curve(test_label, clf.predict_proba(X_test_scaled)[:, 1])

auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label="ROC curve (area = {0:.2f})".format(auc))
plt.plot([0, 1], [0, 1], linestyle="dashed")
# plt.title("FFT 1D with XGBoost roc_curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.savefig("FFT 1D with XGBoost roc_curve.png")
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
import seaborn as sns

cf_mat = confusion_matrix(test_label, clf.predict(X_test_scaled))
df_cm = pd.DataFrame(cf_mat, index = [i for i in ['Queen', 'Queenless']],
                  columns = [i for i in ['Queen', 'Queenless']])
df_cm_normed = df_cm/df_cm.sum()
plt.figure(figsize = (10,7))
sns.heatmap(df_cm, annot=True, fmt="", )


In [None]:
df_cm_normed = df_cm/df_cm.sum()
plt.figure(figsize = (10,7))
sns.heatmap(df_cm_normed, annot=True, fmt=".3f", )