In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from common import *
from models import LogReg, NeuralNetwork, Layer
from cmcrameri import cm
from sklearn.metrics import confusion_matrix

import seaborn as sns
from matplotlib.pyplot import figure

import numpy as np 
%matplotlib inline

### Logistic Regression Gridsearch:

In [None]:
cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer.data,cancer.target,test_size=0.30,random_state=4155)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:

eta_list = np.logspace(-1, -4, 12)
lmb_list = np.concatenate([np.zeros(1),np.logspace(-5, 0, 11)], axis = 0)
acu_scores = np.zeros(len(eta_list))
 
heatmap_mtrx = np.zeros(shape=(len(eta_list), len(lmb_list)))


for eta in range(len(eta_list)):
    for lmb in range(len(lmb_list)):
        logreg_SGD = LogReg(eta = eta_list[eta], lmb = lmb_list[lmb])
        logreg_SGD.fit(X_train_scaled,y_train,batch_size= 5, epochs=100, solver = "SGD")

        heatmap_mtrx[eta,lmb] =  logreg_SGD.accuracy(X_test_scaled, y_test)
        

figure(figsize=(8,8))
eta_list = np.around(eta_list, decimals=4)
lmb_list = np.around(lmb_list, decimals=5)
gridsearch = sns.heatmap(heatmap_mtrx.T,annot=True, xticklabels= eta_list, yticklabels= lmb_list, cmap=cm.lajolla_r)
gridsearch.invert_yaxis()
gridsearch.set_xticklabels(gridsearch.get_xticklabels(),rotation = 80)
gridsearch.set_yticklabels(gridsearch.get_yticklabels(),rotation = 60)

plt.title("$\eta$, $\lambda$ grid search for logistic regression. Batch size = 5, epochs = 100.")
plt.xlabel("$\eta$")
plt.ylabel("$\lambda$")
# plt.savefig(f"{REPORT_FIGURES}{EX_E}logreg_gridsearch.pdf")



### Confusion Matricies: 

In [None]:
from sklearn import metrics
logreg_NRM = LogReg()
logreg_NRM.fit(X_train_scaled,y_train,batch_size= 5, epochs=100, solver = "NRM")
logreg_NRM_accu =  logreg_NRM.accuracy(X_test_scaled, y_test)
logits_NRM = logreg_NRM.forward(X_test_scaled)

y_hat_NRM = logreg_NRM.predict(X_test_scaled)

conf_mat_NRM = confusion_matrix(y_test, y_hat_NRM)



confmatr_NRM = sns.heatmap(conf_mat_NRM,annot = True, annot_kws={"fontsize":"xx-large"}, fmt=".0f", cmap=cm.grayC_r)
#gridsearch.set_xticklabels(gridsearch.get_xticklabels(),rotation = 80)
plt.title(f"Confusion Matrix, Logistic Regression with NRM \nAccuracy: {np.around(logreg_NRM_accu, decimals = 4)}")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
# plt.savefig(f"{REPORT_FIGURES}{EX_E}logreg_nrm_confmat.pdf")


In [None]:
logreg_SGD = LogReg(eta = 0.1, lmb=0)
logreg_SGD.fit(X_train_scaled,y_train,batch_size= 5, epochs=100, solver = "SGD")
logreg_SGD_accu =  logreg_SGD.accuracy(X_test_scaled, y_test)

y_hat_SGD = logreg_SGD.predict(X_test_scaled)

conf_mat_SGD = confusion_matrix(y_test, y_hat_SGD)



confmatr_SGD = sns.heatmap(conf_mat_SGD,annot=True, annot_kws={"fontsize":"xx-large"}, fmt=".0f", cmap=cm.grayC_r)
#gridsearch.set_xticklabels(gridsearch.get_xticklabels(),rotation = 80)
plt.title(f"Confusion Matrix, Logistic Regression with SGD \nAccuracy: {np.around(logreg_SGD_accu, decimals = 4)}")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
# plt.savefig(f"{REPORT_FIGURES}{EX_E}logreg_sgd_confmat.pdf")


### Logistic regression with Sklearn: 

In [None]:
from sklearn.linear_model import LogisticRegression

def sci_kit_test_acu(X_train, X_test, y_train, y_test):
    logreg = LogisticRegression(solver= "sag")
    logreg.fit(X_train, y_train)
    y_pred = logreg.predict(X_test)
    

    return sum(y_pred == y_test) / len(y_pred)

sci_kit_test_acu(X_train_scaled, X_test_scaled, y_train, y_test)



### Compare neural network classification results with Logistic regression results, using ROC curve:


In [None]:

from sklearn import metrics
from matplotlib.pyplot import figure

figure(figsize=(10, 10), dpi=80)

df_NN = pd.read_pickle(f"{REPORT_DATA}{EX_D}_best_yhats")
best_yhat_homebrew = df_NN[['Own NN']].to_numpy()
best_yhat_torch = df_NN[['Torch']].to_numpy()


fpr, tpr, _ = metrics.roc_curve(y_test, best_yhat_homebrew)
roc_auc = metrics.auc(fpr, tpr)

fpr_torch, tpr_torch, _ = metrics.roc_curve(y_test, best_yhat_torch)
roc_auc = metrics.auc(fpr_torch, tpr_torch)


logreg_SGD = LogReg(eta = 0.1, lmb = 0)
logreg_SGD.fit(X_train_scaled,y_train,batch_size= 5, epochs=10, solver = "SGD")
y_hat_SGD = logreg_SGD.forward(X_test_scaled)


fpr_SGD, tpr_SGD, _ = metrics.roc_curve(y_test, best_yhat_torch)
roc_auc = metrics.auc(fpr_SGD, tpr_SGD)
plt.plot(fpr_SGD, tpr_SGD,"o--", label = 'LogReg SGD' % roc_auc)


fpr_NRM, tpr_NRM, _ = metrics.roc_curve(y_test, logits_NRM)
roc_auc = metrics.auc(fpr_NRM, tpr_NRM)
plt.plot(fpr_NRM, tpr_NRM,"o-", label = 'LogReg NRM')

plt.plot(fpr_torch, tpr_torch,"--", label = 'PyTorch' % roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr,"o--", label = 'Implemented NN' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'k--')
plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
# plt.savefig(f"{REPORT_FIGURES}{EX_E}roc_curve.pdf")
