In [1]:
! pip install keras
! pip install tensorflow









Fonte: [Brain Tumor Dataset](https://www.kaggle.com/datasets/jakeshbohaju/brain-tumor)

**N.B.**: la parte degli iperparametri è attualmente commentata in quanto è richiesto molto tempo, da rimovere per la consegna.

# Appunti per la creazione della rete

- **Dense**: implements the operation:
```
output = activation(dot(input, kernel) + bias)
```
where activation is the element-wise activation function passed as the activation argument
- **Bainary Cross Entropy**: computes the cross-entropy loss between true labels and predicted labels.
Use this cross-entropy loss for binary (0 or 1) classification applications. The loss function requires the following inputs:
 - y_true (true label): This is either 0 or 1.
 - y_pred (predicted value): This is the model's prediction, i.e, a single floating-point value which either represents a logit, (i.e, value in [-inf, inf] when from_logits=True) or a probability (i.e, value in [0., 1.] when from_logits=False).
- **SGD**: Gradient descent (with momentum) optimizer.
- **ADAM**:

# Rete Neurale

In [2]:
import pandas as pd
import warnings
# Esecuzione in remoto
# from google.colab import drive

# drive.mount('/content/drive/')

warnings.filterwarnings('ignore')

In [3]:
# Esecuzione in locale
path_to_dataset = './Dataset/Brain Tumor.csv'
path_to_hyperparameter = "./" + "NN_Hyperparameters.json"
path_for_json = "./NN_Hyperparameters"

# Esecuzione in remoto
# path_to_dataset = '/content/drive/MyDrive/Magistrale/Machine Learning/Progetto/Brain Tumor.csv'
# path_to_hyperparameter = "/content/drive/MyDrive/Magistrale/Machine Learning/" + "NN_Hyperparameters.json"
# path_for_json = "/content/drive/MyDrive/Magistrale/Machine Learning/NN_Hyperparameters"    

In [4]:
dataset = pd.read_csv(path_to_dataset, index_col=0)

## Preprocessing del dataset

In [5]:
dataset.dtypes

Class                   int64
Mean                  float64
Variance              float64
Standard Deviation    float64
Entropy               float64
Skewness              float64
Kurtosis              float64
Contrast              float64
Energy                float64
ASM                   float64
Homogeneity           float64
Dissimilarity         float64
Correlation           float64
Coarseness            float64
dtype: object

In [6]:
# Trasformo la variabile target in una variabile categorica
dataset["Class"] = dataset["Class"].astype("category")

# Dalle analisi precedenti abbiamo deciso di utilizzare questi attributi per addestrare il nostro modello
dataset = dataset[['Mean', 'Entropy', 'Skewness', 'Contrast', 'Correlation', 'Class']]

dataset.head()

Unnamed: 0_level_0,Mean,Entropy,Skewness,Contrast,Correlation,Class
Image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Image1,6.535339,0.109059,4.276477,98.613971,0.981939,0
Image2,8.749969,0.266538,3.718116,63.858816,0.988834,0
Image3,7.341095,0.001467,5.06175,81.867206,0.978014,1
Image4,5.958145,0.001477,5.677977,151.229741,0.964189,1
Image5,7.315231,0.146761,4.283221,174.988756,0.972789,0


In [7]:
y = dataset['Class']
X = dataset.drop(['Class'], axis=1)

columns_name = X.columns

Per le reti neurali risulta utile standardizzare i dati

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X[columns_name] = scaler.fit_transform(X)
X.head()

Unnamed: 0_level_0,Mean,Entropy,Skewness,Contrast,Correlation
Image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Image1,-0.5157,0.50465,0.067855,-0.26805,1.000713
Image2,-0.129018,2.74605,-0.150204,-0.585492,1.264377
Image3,-0.375013,-1.026708,0.374531,-0.42101,0.850636
Image4,-0.616481,-1.026561,0.615188,0.212525,0.322041
Image5,-0.379529,1.041256,0.070489,0.429532,0.650854


Suddividiamo ora i dati in Train e Test, abbiamo scelto di utilizzare l'80% dei
dati per il training e il restante 20% per il test.

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

Per evitare di introdurre errori nell'addestramento della rete abbiamo voluto
verificare che il dataset dopo lo split fosse ancora bilanciato

In [10]:
print("Dataset:\n\t-Negative", round((y.value_counts()[0]/y.shape[0]) * 100, 2),
      "%\n\t-Positive", round((y.value_counts()[1]/y.shape[0]) * 100, 2), "%")
print("Train data:\n\t- Negative", round((y_train.value_counts()[0] / y_train.shape[0]) * 100, 2),
      "%\n\t- Positive", round((y_train.value_counts()[1] / y_train.shape[0]) * 100, 2), "%")

Dataset:
	-Negative 55.26 %
	-Positive 44.74 %
Train data:
	- Negative 55.27 %
	- Positive 44.73 %


## Ricerca degli iperparametri

In [11]:
from keras.models import Sequential
from keras.layers import Dense
from itertools import product




In [12]:
def create_model(units=[6, 6], activation='relu', opt='adam', metric='accuracy', input_layer_size=5):
  # Creazione del modello
  model = Sequential()

  model.add(Dense(units=units[0], input_shape=(input_layer_size,), activation=activation))

  if len(units) >= 2 and units[1] != 0:
    model.add(Dense(units=units[1], activation=activation))

  model.add(Dense(1, activation='sigmoid'))

  model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[metric])

  return model

Parametri tra cui cercare la combinazione migliore

In [13]:
units = [0, 5, 10, 50]
activation = ['sigmoid', 'relu', 'leaky_relu']
optimizer = ['sgd', 'adam']
batch_size = [50, 100, 300]
epochs = [100, 300]

param_combinations = list(product(units, units, activation, optimizer, batch_size, epochs))

list_index_remove = []

for i, params in enumerate(param_combinations):
  if params[0] == 0:
    list_index_remove.append(params)

for to_remove in list_index_remove:
  param_combinations.remove(to_remove)

print("Total combinations:", len(param_combinations))

Total combinations: 432


In [14]:
X_cross_val = pd.DataFrame(data = X_train, columns = columns_name)
Y_cross_val = pd.DataFrame(data = y_train, columns = ['Class'])

Y_cross_val = (Y_cross_val.reset_index()).drop(['Image'], axis=1)
X_cross_val = (X_cross_val.reset_index()).drop(['Image'], axis=1)

In [15]:
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import scipy.stats as st
import numpy as np
import time
import json

In [16]:
k = 5 # Number of folds
skf = StratifiedKFold(n_splits=k, shuffle=True)

# Create a history of model train
performance = dict()

# Iterate over combination
for i, (u1, u2, act, opt, batch, ep) in enumerate(tqdm(param_combinations)):
  # Setup dictornary
  performance[i] = dict()
  performance[i]['param'] = ([u1, u2], act, opt, batch, ep)
  performance[i]['results'] = []
  performance[i]['time'] = []

  # Create model
  model = create_model(units=[u1, u2], activation=act, opt=opt)

  # Train model
  for train_idx, test_idx in skf.split(X_cross_val, Y_cross_val):
    X_val_train, X_val_test = X_cross_val[X_cross_val.index.isin(train_idx)], X_cross_val[X_cross_val.index.isin(test_idx)]
    y_val_train, y_val_test = Y_cross_val[Y_cross_val.index.isin(train_idx)], Y_cross_val[Y_cross_val.index.isin(test_idx)]

    start = time.time()

    model.fit(X_val_train, y_val_train, epochs=ep, batch_size=batch, verbose=0)

    end = time.time()

    # Evaluate the model on the test set
    score = model.evaluate(X_val_test, y_val_test, verbose=0)
    performance[i]['results'].append(score[1])
    performance[i]['time'].append(end - start)

  accuracy_score = performance[i]['results']
  performance[i]['interval90'] = st.t.interval(confidence=0.90, df=len(accuracy_score)-1, loc=np.mean(accuracy_score), scale=st.sem(accuracy_score))
  performance[i]['interval95'] = st.t.interval(confidence=0.95, df=len(accuracy_score)-1, loc=np.mean(accuracy_score), scale=st.sem(accuracy_score))
  performance[i]['meanTime'] = sum(performance[i]['time']) / len(performance[i]['time'])
  performance[i]['meanAccuracy'] = sum(accuracy_score) / len(accuracy_score)

  if int(i) % 100 == 0 and int(i) != 0:
    with open(str(path_for_json + str(int(i / 100)) + ".json"), "w") as outfile:
      json.dump(performance, outfile)

  0%|          | 0/432 [00:00<?, ?it/s]







 59%|█████▉    | 255/432 [3:29:12<2:10:58, 44.40s/it] 

Analisi dei risultati ottenuti

In [None]:
dict_keys = list(performance.keys())

In [None]:
performance_interval90 = dict()
performance_interval95 = dict()
performance_time = dict()
performance_acc = dict()

for key in dict_keys:
  interval90 = performance[key]['interval90']
  interval95 = performance[key]['interval90']

  performance_interval90[key] = interval90[1] - interval90[0]
  performance_interval95[key] = interval95[1] - interval95[0]
  performance_time[key] = performance[key]['meanTime']
  performance_acc[key] = performance[key]['meanAccuracy']

performance_interval90 = sorted(performance_interval90.items(), key=lambda x:x[1])
performance_interval95 = sorted(performance_interval95.items(), key=lambda x:x[1])
performance_time = sorted(performance_time.items(), key=lambda x:x[1])
performance_acc = sorted(performance_acc.items(), key=lambda x:x[1], reverse=True)

sorted_dict = dict()

for i, (index, _) in enumerate(performance_interval90):
  sorted_dict[index] = dict()
  sorted_dict[index]['90'] = i + 1

for i, (index, _) in enumerate(performance_interval95):
  sorted_dict[index]['95'] = i + 1

for i, (index, _) in enumerate(performance_time):
  sorted_dict[index]['time'] = i + 1

for i, (index, _) in enumerate(performance_acc):
  sorted_dict[index]['acc'] = i + 1

dataframe_model = pd.DataFrame(sorted_dict)
dataframe_model = dataframe_model.T

weights = pd.Series([1, 1, 2, 2], index=dataframe_model.columns)

dataframe_model['Total'] = (dataframe_model * weights).sum(axis=1)
dataframe_model = dataframe_model.sort_values(by=['Total'])

best_model_index = dataframe_model.index[0]
best_model = performance[best_model_index]['param']
best_model

Rappresentazione dei risultati ottenuti

In [None]:
import plotly.express as px
fig = px.scatter(dataframe_model, x='time', y='acc')
fig.show()

## Addestramento del modello

In [None]:
# Create the best model
model = create_model(units=best_model[0], activation=best_model[1], opt=best_model[2], input_layer_size=X_train.shape[1])

# Addestrament del modello
model.fit(X_train, y_train, epochs=300, batch_size=best_model[3], verbose=1)

## Valutazione del modello

Valutazione del modello attraverso le metriche fornite da Keras

In [None]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Valutazione del modello utilizzando altre metriche

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

Definizione di una funzione per associare un etichetta ai risultati della rete.

In [None]:
def my_predict(model, X_test, threshold=0.5):
    y_pred = model.predict(X_test)
    y_pred = np.where(y_pred > threshold, 1, 0).flatten()
    return y_pred

In [None]:
# Valutazione del modello utilizzando i dati di test
y_pred = my_predict(model, X_test, 0.3)

accuracy_train_test = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Stampa le prestazioni del modello
print('Accuracy:', accuracy_train_test)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Calcolo della matrice di confusione

In [None]:
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Negative', 'Positive'])
disp.plot()

Report della classificazione

In [None]:
classification_report(y_pred, y_test)

Calcolo e disegno della curva ROC

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt


y_pred_prob = model.predict(X_test).ravel()

# Calcola la curva ROC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)

# Calcola l'AUC della curva ROC
roc_auc = roc_auc_score(y_test, y_pred_prob)

# Disegna la curva ROC
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([-0.01, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.show()

### Valutazione del modello attraverso 10-fold validation

In [None]:
n_fold = 10
folds = StratifiedKFold(n_splits=n_fold, shuffle=True)

accuracy_stratified = []

for n_fold, (train_idx, valid_idx) in enumerate(folds.split(X, y)):
  X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
  y_train, y_valid = y[train_idx], y[valid_idx]
  model = create_model(units=best_model[0], activation=best_model[1], opt=best_model[2], input_layer_size=X_train.shape[1])
  model.fit(X_train, y_train, epochs=best_model[4], batch_size=best_model[3], verbose=0)
  y_pred_valid = my_predict(model, X_valid)
  accuracy_stratified.append(accuracy_score(y_valid, y_pred_valid))

Calcolo dell'intervallo di confidenza

In [None]:
st.t.interval(confidence=0.90, df=len(accuracy_stratified)-1, loc=np.mean(accuracy_stratified), scale=st.sem(accuracy_stratified))

In [None]:
import matplotlib.pyplot as plt
from scipy import stats

# Calculate mean and confidence interval on k-fold
mean_value = np.mean(accuracy_stratified)
confidence_interval = stats.t.interval(0.95, len(accuracy_stratified)-1, loc=np.mean(accuracy_stratified), scale=stats.sem(accuracy_stratified))

# Plot the mean and confidence interval
plt.errorbar(0, mean_value, yerr=(confidence_interval[1] - confidence_interval[0])/2, fmt='o', label='K-fold')

# Add labels and title
plt.xlabel('Group')
plt.ylabel('Value')
plt.title('Mean with Confidence Interval')

# Show the plot
plt.legend()
plt.show()