### Classification with Neural Newtork:

In [None]:

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from common import *
from models import LogReg, NeuralNetwork, Layer, TorchNeuralNetwork
import seaborn as sns
from matplotlib.pyplot import figure
import numpy as np 
%matplotlib inline
import pandas as pd
from matplotlib import cm
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from cmcrameri import cm

print(tf. __version__) 
# print('tensorflow version', tf.__version__)
# print(tf.config.list_physical_devices('GPU'))
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

print(f"Root directory: {os.getcwd()}")
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
    "font.size": 10,
})

#%matplotlib

In [None]:


def model_metrics(X_test, y_test, model):
    
    y_hat = model.predict(X_test)
    prediction_val = [1 if x > 0.5 else 0 for x in y_hat.data.numpy()]
    correct_val = (prediction_val == y_test.numpy()).sum()
    if type(y_hat) == torch.Tensor:
        y_test = y_test.detach().numpy()
        conf_mat = confusion_matrix(y_test, prediction_val)
    else:
         conf_mat = confusion_matrix(y_test, prediction_val)
    
    print(f"Accuracy: {correct_val/len(y_test)}")
        
    return conf_mat


### Grid Search: 

In [None]:
import torch
from torch import nn
import torch.optim as optim
import torch.utils.data as data_utils
from sklearn.metrics import confusion_matrix

cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer.data,cancer.target,test_size=0.30,random_state=4155)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
input_dim = X_train.shape[1]
print(input_dim)


X_test_scaled_torch = torch.from_numpy(X_test_scaled).float()
y_test_torch = torch.from_numpy(y_test).float()

#prepare dataset for torch dataloader:
trainset = data_utils.TensorDataset(torch.from_numpy(X_train_scaled).float(),
                                 torch.from_numpy(y_train).float())

#create torch dataloader: 
trainloader = data_utils.DataLoader(trainset, batch_size=4, shuffle=True)

epochs = 100
batch_size = 10
nbf_features = X_train.shape[1]
apply_lr_scheduler = False
hidden_sizes = np.array([32,16,8])
eta_list = np.logspace(-1, -4, 6)
lmb_list = np.logspace(-5, 0, 6)
act_func = "sigmoid"
print(f"len of testset: {len(y_test)}")

# Grid search on eta and lambda 
best_acc_homebrew = 0
best_model_homebrew = ""

best_acc_torch = 0
best_model_torch = ""

conf_mat_torch = None
conf_mat_homebrew = None
best_yhat_homebrew = None
best_yhat_torch = None



for hidden_size in (hidden_sizes):
    for lmb in tqdm(lmb_list):
        for eta in (eta_list):
            np.random.seed(4155)
            hidden1 = Layer(input_dim, hidden_size, activation="relu", name="hidden1")
            hidden2 = Layer(hidden1.neurons, hidden1.neurons*2, activation="relu", name="hidden2")
            hidden3 = Layer(hidden2.neurons, hidden2.neurons//2, activation="relu", name="hidden3")
            output = Layer(hidden3.neurons, nbf_neurons=1, activation="sigmoid" , name="output")

            model = NeuralNetwork(X_test_scaled, y_test.reshape(-1,1),learning_rate=eta,lmb=lmb, network_type="classification")
            model.add(hidden1)
            model.add(hidden2)
            model.add(hidden3)
            model.add(output)

            model.fit(X_train_scaled, y_train.reshape(-1,1), batch_size=batch_size, epochs=epochs)
            y_hat = model.logistic_predict(X_test_scaled)
            y_hat = y_hat.flatten()

            def home_brew_accuracy(y_hat, y_true):
                return np.sum(y_hat == y_true) / y_true.shape[0]

            acc_homebrew = home_brew_accuracy(y_hat,y_test)
            #print(f"Accuracy: {acc}, hidden_size: {hidden_size}, lambda: {lmb}, eta: {eta}, ")
                  
            if acc_homebrew > best_acc_homebrew:
                best_model_homebrew = f"Accuracy: {acc_homebrew}\n hidden_size: {hidden_size}, lambda: {lmb}, eta: {eta}"
                best_acc_homebrew = acc_homebrew
                conf_mat_homebrew = confusion_matrix(y_test, y_hat)
                best_yhat_homebrew = model.predict(X_test_scaled)

                
            #TORCH: 
            net_Torch = TorchNeuralNetwork(eta =eta, lmb=lmb, input_dim = X_train.shape[1], hidden_size = hidden_size)
            net_Torch.fit(epochs, trainloader)

            acc_torch = net_Torch.torch_accuracy(X_test_scaled_torch, y_test_torch)
            
            if acc_torch > best_acc_torch:
                best_model_torch = f"Accuracy: {acc_torch}\n hidden_size: {hidden_size}, lambda: {lmb}, eta: {eta}"
                best_acc_torch = acc_torch
                #torch.save(net_Torch.state_dict(),f"{os.getcwd()}_best_torch_cls")
                y_hat_torch = net_Torch.forward(X_test_scaled_torch)
                best_yhat_torch = y_hat_torch
                prediction_val_torch = [1 if x > 0.5 else 0 for x in y_hat_torch.data.numpy()]
                conf_mat_torch = confusion_matrix(y_test, prediction_val_torch)

                
        
print(f"Best own implemented NN: {best_model_homebrew}")

print(f"Best torch: {best_model_torch}")


In [1]:
#Save results to compare with logistic regression in task E: 
best_yhat_homebrew= best_yhat_homebrew.reshape(-1,1)
df = pd.DataFrame(data = best_yhat_homebrew, columns=['Own NN'])
df["Torch"] = best_yhat_torch

df.to_pickle(f"{REPORT_DATA}{EX_D}_best_yhats")


NameError: name 'best_yhat_homebrew' is not defined

### Confusion Matricies: 

In [None]:



cm_torch = sns.heatmap(conf_mat_torch,annot=True, fmt=".0f", cmap=cm.lajolla)
#gridsearch.set_xticklabels(gridsearch.get_xticklabels(),rotation = 80)
plt.title(f"Confusion Matrix, PyTorch\n Accuracy: {np.around(best_acc_torch, decimals = 4)}")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.savefig(f"{REPORT_FIGURES}{EX_D}torch_conf_matrix.pdf")



In [None]:

cm_homebrew = sns.heatmap(conf_mat_homebrew,annot=True, fmt=".0f", cmap=cm.lajolla)
#gridsearch.set_xticklabels(gridsearch.get_xticklabels(),rotation = 80)
plt.title(f"Confusion Matrix, own implemented NN \nAccuracy: {np.around(best_acc_homebrew, decimals = 4)}")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.savefig(f"{REPORT_FIGURES}{EX_D}homebrew_conf_matrix.pdf")





### ROC curves 

In [None]:
from sklearn import metrics
from matplotlib.pyplot import figure

figure(figsize=(10, 10), dpi=80)

fpr, tpr, threshold = metrics.roc_curve(y_test, best_yhat_homebrew)
roc_auc = metrics.auc(fpr, tpr)

fpr_torch, tpr_torch, threshold = metrics.roc_curve(y_test, best_yhat_torch.detach().numpy())
roc_auc = metrics.auc(fpr_torch, tpr_torch)


logreg_SGD = LogReg(eta = 0.1, lmb = 0)
logreg_SGD.fit(X_train_scaled,y_train,batch_size= 5, epochs=10, solver = "SGD")
y_hat_SGD = logreg_SGD.forward(X_test_scaled)
fpr_SGD, tpr_SGD, threshold = metrics.roc_curve(y_test, best_yhat_torch.detach().numpy())
roc_auc = metrics.auc(fpr_SGD, tpr_SGD)
plt.plot(fpr_SGD, tpr_SGD,"o--", label = 'LogReg(SGD)' % roc_auc)


logreg_NRM = LogReg()
logreg_NRM.fit(X_train_scaled,y_train,batch_size= 5, epochs=1, solver = "NRM")
logreg_NRM.accuracy(X_test_scaled, y_test)
y_hat_NRM = logreg_NRM.forward(X_test_scaled)


fpr_NRM, tpr_NRM, threshold = metrics.roc_curve(y_test, y_hat_NRM)
roc_auc = metrics.auc(fpr_NRM, tpr_NRM)
plt.plot(fpr_NRM, tpr_NRM,"o-", label = 'LogReg NRM')

plt.plot(fpr_torch, tpr_torch,"--", label = 'Torch' % roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr,"o--", label = 'Homebrew' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'k--')
plt.xlim([-0.01, 1.01])
plt.ylim([-0.01, 1.01])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
