In [None]:
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

from utils import read_data, int_enc, shuffle_Xy, plot_confusion_matrix, plot_roc_curves
import numpy as np

#### Run config

In [None]:
GPL = "GPL570"

lr_model = Pipeline([
    ("pca", PCA(n_components=50)),
    ("lr", LogisticRegression(max_iter=50000))
])

nn_model = MLPClassifier(hidden_layer_sizes=(50, 50, 50))

model = lr_model
name = "LR"

#### Read and prepare data

In [None]:
data = read_data(f"data/{GPL}.csv")
data = int_enc(data)

X = data.drop(["label"], axis=1).values
y = data["label"].values

X = StandardScaler().fit_transform(X)
X, y = shuffle_Xy(X, y)

#### 10-fold cross validation

In [None]:
cv = cross_validate(model, X, y, cv=10)
scores = cv["test_score"]

print("Avg. Score: {:.2f}".format(np.mean(scores)))
np.save(f"results/{name}{GPL}.npy", scores)

#### Fit on train, predict on test

In [None]:
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=.30)

model.fit(X_tr, y_tr)
yhat = model.predict(X_te)
yhat_proba = model.predict_proba(X_te)

#### Plot confusion matrix

In [None]:
plot_confusion_matrix(
    y_te, 
    yhat, 
    title=f"{name} - Confusion Matrix on {GPL}",
    path=f"results/{name}{GPL}confusion.png"
)

#### Plot ROC curves

In [None]:
plot_roc_curves(
    y_te, 
    yhat_proba, 
    title=f"{name} - ROC Curve on {GPL}", 
    path=f"results/{name}{GPL}ROC.png"
)