<a href="https://colab.research.google.com/github/DunkleCat/IA-challenge-prova-1/blob/master/mushroom_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Librerie

In [0]:
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

# Rende i numeri a virgola mobile più leggibili
np.set_printoptions(precision=3, suppress=True)

# Dataframe

## Download del dataset

In [0]:
csv_file = "/content/drive/My Drive/datasets/mushroom_data_all.csv"
dataframe = pd.read_csv(csv_file)
target_label = "class_edible"

## Vista generica del dataset

In [3]:
dataframe.head()

Unnamed: 0,class_edible,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g


## Ottimizzazione del dataset

La rete neurale lavora sfruttando i numeri e non le stringhe. Per questo motivo conviene trasformare ogni colonna che contiene degli identificatori scritti come carattere (in questo caso tutti) in numeri interi che rappresentino la classe corrispondente. 

In [0]:
for elem in dataframe:
  dataframe[elem] = pd.Categorical(dataframe[elem])
  dataframe[elem] = getattr(dataframe, elem).cat.codes

## Dataset post-ottimizzazione

In [5]:
dataframe.head()

Unnamed: 0,class_edible,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,0,3,2,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,0,2,2,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,0,2,2,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,0,3,2,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,1,3,2,2,7,7,0,2,1,0,3,0,1


# Modellazione rete neurale

## Preparazione del modello

In [0]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer='adam',
    metrics=['accuracy'])


## Preparazione del Dataset

In [7]:
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.1)
print(len(train), 'train examples')
print(len(test), 'test examples')
print(len(val), 'val example')

5849 train examples
1625 test examples
650 val example


## Addestramento

In [8]:
# Creazione del dataset tensorflow di training
train_target = train.pop(target_label)
train_dataset = (tf.data.Dataset.from_tensor_slices((train.values, train_target.values))).shuffle(len(train)).batch(1)

# Fase di Training
model.fit(train_dataset, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7fd98d3ae278>

## Test

In [9]:
# Creazione del dataset tensrflow di test 
test_target = test.pop(target_label)
test_dataset = (tf.data.Dataset.from_tensor_slices((test.values, test_target.values))).shuffle(len(test)).batch(1)

# Fase di test
test_loss, test_accuracy = model.evaluate(test_dataset)
print('\n\nTest Loss {}, Test Accuracy {}'.format(test_loss, test_accuracy))



Test Loss 4.597512148762917e-09, Test Accuracy 1.0


## Valutazione

In [10]:
# Creazione del dataset di valutazione
val_target = val.pop(target_label)
val_dataset = (tf.data.Dataset.from_tensor_slices(val.values)).batch(1)

# Fase di valutazione
predictions = model.predict(val_dataset)

# Presentazione dei risultati di valutazione
for prediction, poisonous in zip(predictions[:10], list(val_target)[:10]):
  prediction = tf.sigmoid(prediction).numpy()
  print("Predicted poisonous: {:.2%}".format(prediction[0]),
        " | Actual outcome: ",
        ("Poisonous" if bool(poisonous) else "Safe"))


Predicted poisonous: 100.00%  | Actual outcome:  Poisonous
Predicted poisonous: 100.00%  | Actual outcome:  Poisonous
Predicted poisonous: 0.00%  | Actual outcome:  Safe
Predicted poisonous: 100.00%  | Actual outcome:  Poisonous
Predicted poisonous: 0.00%  | Actual outcome:  Safe
Predicted poisonous: 0.00%  | Actual outcome:  Safe
Predicted poisonous: 100.00%  | Actual outcome:  Poisonous
Predicted poisonous: 0.00%  | Actual outcome:  Safe
Predicted poisonous: 0.00%  | Actual outcome:  Safe
Predicted poisonous: 100.00%  | Actual outcome:  Poisonous
