<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/model1_provePostProcessing_Franci.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
'''
@File name: model1.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import numpy as np

from keras import optimizers
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization, Activation
from keras.callbacks import EarlyStopping

from sklearn.utils import compute_class_weight
from sklearn.metrics import confusion_matrix

import sys
sys.path.append('local_modules')

import postprocessing_sliding
#import postprocessing_Silvia
#import plotting



In [0]:
#fix random seed for reproducibility
seed = 5
np.random.seed(seed)

In [0]:
'''
@Description: generate a multilayer perceptron with LeakyRelu as activation
function.
@param: 
  - shape : int, the shape of the input
  - n_features: int, the number of features given
'''

#MODELLO 1
#REGOLA: input/2, input, 2*input, 1
#layers TUTTE LE FEATURE: 57, 113, 226, 1 
#layers TIME CO2 TEMP: 21, 41, 82, 1 
#layers TIME CO2 TEMP PM25/TVOC: 30, 59, 118, 1 
#layers TIME CO2 TEMP PM25 TVOC: 39, 77, 154, 1 


def generate_model_leaky(shape, n_features):

  units_1 = int(n_features/2)
  units_2 = n_features
  units_3 = n_features*2

  model = Sequential()
  model.add(BatchNormalization())
  
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [0]:
'''
@Description: generate a multilayer perceptron with Relu as activation
function.
@param: 
  - shape : int, the shape of the input
  - n_features: int, the number of features given
'''

#MODELLO 2
#REGOLA: a= input, b= a*2/3+c, c= b*2/3+1
#layers TUTTE LE FEATURE: 113, 229, 153, 1
#layers TIME CO2 TEMP: 41, 85, 57, 1 
#layers TIME CO2 TEMP PM25/TVOC: 59, 121, 81, 1 
#layers TIME CO2 TEMP PM25 TVOC: 77, 157, 105, 1 

def generate_model(shape, n_features):
  
  a = np.array([[1,0,0],[-(2/3),1,-1],[0,-(2/3),1]])
  b = np.array([n_features,0,1])
  x = np.linalg.solve(a, b)

  units_1 = int(x[0])
  units_2 = int(x[1])
  units_3 = int(x[2])

  model = Sequential()
 
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [39]:
#@title SCEGLI IL DATASET E IL MODELLO

'''
@Description: MAIN
'''

#LOAD DATA
print("Loading data...")

dataset = '/root/data/6_uHoo_featureDataset.arff' #@param {type:"string"}

with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("\nConverting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
#print(s)
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

#print(s-1)

n_features = s-1

#SPLIT INTO TRAINING, VALIDATION AND TEST SETS
print("\nSplit into training, validation and test sets...")

train_rate = 80
val_rate = 10
train = round(int((dataset.shape[0]*train_rate)/100))
val = round(int((dataset.shape[0]*(train_rate+val_rate))/100))

train_data = X[:train]
train_label = Y[:train]

val_data = X[train+1:val]
val_label = Y[train+1:val]

test_data = X[val+1:]
test_label = Y[val+1:]
print("DATASET SPLITTED")

#COMPUTE CLASS WEIGHT
labels = np.unique(train_label)
classWeight = compute_class_weight('balanced', labels, train_label)
classWeight = dict(zip(labels,classWeight))

#GENERATE MODEL
print("\nGenerate model...")

modello = 1 #@param {type:"integer"}

if modello==1 :
  model = generate_model_leaky(train_data.shape[-1], n_features)
elif modello==2:
  model = generate_model(train_data.shape[-1], n_features)

#OPTIMIZERS
adm = optimizers.Adam(lr=0.0001)

#COMPILE MODEL
print("\nCompile model...")
model.compile(loss='binary_crossentropy', optimizer = adm , metrics=['accuracy'])

#EARLY STOPPING
es = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

#FIT MODEL
print("\nFit model...")
history = model.fit(train_data, train_label, epochs=20, validation_data = (val_data, val_label), batch_size = 128, shuffle = True, class_weight = classWeight, verbose=1, callbacks = [es])

#EVALUATE MODEL
print("\nEvaluate model...")
scores_test = model.evaluate(test_data, test_label, batch_size=128, verbose = 1)
print("Test loss: %.2f%%" % (scores_test[0] * 100))
print("Test accuracy: %.2f%%" % (scores_test[1] * 100))

#CALCULATE PREDICTIONS
print("\nCalculate predictions...")
pred = model.predict_classes(test_data, batch_size=128, verbose=0)
flat_pred = [item for sublist in pred for item in sublist]

#CONFUSION MATRIX
print("\nCompute confusion matrix...")
y_true = test_label
y_pred = pred
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

Loading data...
DATASET LOADED

Converting values...
CONVERSION DONE

Split into training, validation and test sets...
DATASET SPLITTED

Generate model...

Compile model...

Fit model...
Train on 183944 samples, validate on 22992 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

Evaluate model...
Test loss: 32.77%
Test accuracy: 85.69%

Calculate predictions...

Compute confusion matrix...
TN 19401
FP 2790
FN 500
TP 301
Other corretti: 87.43 %
Pasto corretti: 37.58 %


In [78]:
#@title SCEGLI maxWidth
'''
@Description: POST PROCESSING
'''

maxWidth = 3 #@param {type:"integer"}

new_pred = postprocessing_sliding.sliding_windows(flat_pred)
#new_pred = postprocessing_Silvia.setWidth(maxWidth,flat_pred)

print("AFTER FIRST SW")

#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
y_true = test_label
n_y_pred = new_pred
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

AFTER FIRST SW

Compute NEW confusion matrix...
TN 19382
FP 2809
FN 500
TP 301
Other corretti: 87.34 %
Pasto corretti: 37.58 %


In [81]:
#estrazione degli other reali
i = 0 
while i < len(pred):
  if time[i]<=330 or 1020<=time[i]<1080:
    pred[i] = 0
  i = i+1

print("AFTER other bla bla")

#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
y_true = test_label
n_y_pred = pred
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

AFTER other bla bla

Compute NEW confusion matrix...
TN 19532
FP 2659
FN 500
TP 301
Other corretti: 88.02 %
Pasto corretti: 37.58 %


In [0]:
time=[]
for i in test_data:
  time.append(i[-5])

In [61]:
import more_itertools

#sintesi dataset con attività giuste

#estrazione sliding windows di 5 minuti nelle ore in cui ci dovrebbe essere la colazione
col = list(more_itertools.windowed(test_label,n=5, step=1))
time_col = list(more_itertools.windowed(time,n=5, step=1))
lista = (col,time_col)

sw_col_real= []

i = 0 
while i < len(lista[0]):
  if lista[1][i][0]>330 and lista[1][i][4]<660:
    sw_col_real.append(lista[0][i][2])
  i = i+1
  

#estrazione sliding windows di 30 minuti nelle ore in cui ci dovrebbe essere il pranzo e la cena  
pasto = list(more_itertools.windowed(test_label,n=31, step=1))
time_pasto = list(more_itertools.windowed(time,n=31, step=1))
lista = (pasto,time_pasto)

sw_pranzo_real = []
sw_cena_real = []

i = 0 
while i < len(lista[0]):
  if lista[1][i][0]>=660 and lista[1][i][30]<1020:
    sw_pranzo_real.append(lista[0][i][15])
  if lista[1][i][0]>=1080 and lista[1][i][30]<1439:
    sw_cena_real.append(lista[0][i][15])
  i = i+1
  
print(sw_col_real)
print(sw_pranzo_real)
print(sw_cena_real)

total_real = sw_col_real + sw_pranzo_real + sw_cena_real

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [66]:
len(sw_col_real) + len(sw_pranzo_real) + len(sw_cena_real) #+ len(other_real)

23004

In [0]:
#estrazione sliding windows di 5 minuti nelle ore in cui ci dovrebbe essere la colazione
pred_col = list(more_itertools.windowed(pred,n=5, step=1))
time_col = list(more_itertools.windowed(time,n=5, step=1))
lista = (pred_col,time_col)

sw_col= []

i = 0 
while i < len(lista[0]):
  if lista[1][i][0]>330 and lista[1][i][4]<660:#dalle 5:30 alle 11:00
    sw_col.append(lista[0][i])
  i = i+1
  
#estrazione sliding windows di 30 minuti nelle ore in cui ci dovrebbe essere il pranzo e la cena  
pred_pasto = list(more_itertools.windowed(pred,n=31, step=1))
time_pasto = list(more_itertools.windowed(time,n=31, step=1))
lista = (pred_pasto,time_pasto)

sw_pranzo = []
sw_cena = []

i = 0 
while i < len(lista[0]):
  if lista[1][i][0]>659 and lista[1][i][30]<1020: #dalle 11:00 alle 17.00
    sw_pranzo.append(lista[0][i])
  if lista[1][i][0]>1079 and lista[1][i][30]<1439: #dalle 18 alle 24.00
    sw_cena.append(lista[0][i])
  i = i+1


In [68]:
#creazione di tre array in cui si inserisce il minuto centrale di una sw se questa ci sembra una colazione, un pranzo o una cena    
result_col = [];
result_pranzo = [];
result_cena = [];

for j in range(0,3):
  if j == 0: 
    windowsList = sw_col
    centro = 2
    min_pasto = 3
  if j == 1: 
    windowsList = sw_pranzo
    centro = 15
    min_pasto = 20
  if j == 2: 
    windowsList = sw_cena
    centro = 15
    min_pasto = 20
  
  result = []
  i = 0 
  while i < len(windowsList): 
    count = 0

    z = 0  
    while z < len(windowsList[i]):
      if windowsList[i][z] == 1 :
        count = count + 1
      z = z + 1

    if count > min_pasto:
      result.append(1)
    else:
      result.append(0)

    i = i + 1
  
  if j == 0:
    result_col = result
  if j == 1:
    result_pranzo = result
  if j == 2:
    result_cena = result
  
print(result_col)
print(result_pranzo)
print(result_cena)

total_result = result_col + result_pranzo + result_cena


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [70]:
#CONFUSION MATRIX
print("total")
print("\nCompute NEW confusion matrix...")
y_true = total_real
n_y_pred = total_result
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

total

Compute NEW confusion matrix...
TN 20192
FP 2011
FN 527
TP 274
Other corretti: 90.94 %
Pasto corretti: 34.21 %


In [71]:
len(result_other)

5998

In [77]:
print("\nCompute confusion matrix colazione...")
y_true = sw_col_real
y_true = np.array(y_true,dtype=np.int)
n_y_pred = result_col
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

print("\nCompute NEW confusion matrix pranzo...")
y_true = sw_pranzo_real
y_true = np.array(y_true,dtype=np.int)
n_y_pred = result_pranzo
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

print("\nCompute NEW confusion matrix cena...")
y_true = sw_cena_real
y_true = np.array(y_true,dtype=np.int)
n_y_pred = result_cena
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)




Compute confusion matrix colazione...
TN 4467
FP 654
FN 84
TP 34
Other corretti: 87.23 %
Pasto corretti: 28.81 %

Compute NEW confusion matrix pranzo...
TN 5099
FP 481
FN 138
TP 84
Other corretti: 91.38 %
Pasto corretti: 37.84 %

Compute NEW confusion matrix cena...
(5965,)
TN 4628
FP 876
FN 305
TP 156
Other corretti: 84.08 %
Pasto corretti: 33.84 %


In [0]:
'''
@Description: PLOTTING
'''

plotting.plot_model_results(history)
plotting.plot_co2_temp(flat_pred, val)

In [0]:
#model.save('my_model1.h5')
#model = load_model('my_model.h5')