<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/model1_provaPostProcessing_Federica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
'''
@File name: model1.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import numpy as np

from keras import optimizers
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization, Activation
from keras.callbacks import EarlyStopping

from sklearn.utils import compute_class_weight
from sklearn.metrics import confusion_matrix

import sys
sys.path.append('local_modules')

#import postprocessing_sliding
#import postprocessing_Silvia
import plotting



In [0]:
#fix random seed for reproducibility
seed = 5
np.random.seed(seed)

In [0]:
'''
@Description: generate a multilayer perceptron with LeakyRelu as activation
function.
@param: 
  - shape : int, the shape of the input
  - n_features: int, the number of features given
'''

#MODELLO 1
#REGOLA: input/2, input, 2*input, 1
#layers TUTTE LE FEATURE: 57, 113, 226, 1 
#layers TIME CO2 TEMP: 21, 41, 82, 1 
#layers TIME CO2 TEMP PM25/TVOC: 30, 59, 118, 1 
#layers TIME CO2 TEMP PM25 TVOC: 39, 77, 154, 1 


def generate_model_leaky(shape, n_features):

  units_1 = int(n_features/2)
  units_2 = n_features
  units_3 = n_features*2

  model = Sequential()
  model.add(BatchNormalization())
  
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [0]:
'''
@Description: generate a multilayer perceptron with Relu as activation
function.
@param: 
  - shape : int, the shape of the input
  - n_features: int, the number of features given
'''

#MODELLO 2
#REGOLA: a= input, b= a*2/3+c, c= b*2/3+1
#layers TUTTE LE FEATURE: 113, 229, 153, 1
#layers TIME CO2 TEMP: 41, 85, 57, 1 
#layers TIME CO2 TEMP PM25/TVOC: 59, 121, 81, 1 
#layers TIME CO2 TEMP PM25 TVOC: 77, 157, 105, 1 

def generate_model(shape, n_features):
  
  a = np.array([[1,0,0],[-(2/3),1,-1],[0,-(2/3),1]])
  b = np.array([n_features,0,1])
  x = np.linalg.solve(a, b)

  units_1 = int(x[0])
  units_2 = int(x[1])
  units_3 = int(x[2])

  model = Sequential()
 
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [15]:
#@title SCEGLI IL DATASET E IL MODELLO

'''
@Description: MAIN
'''

#LOAD DATA
print("Loading data...")

dataset = '/root/data/uHooComplete_featureDataset.arff' #@param {type:"string"}

with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("\nConverting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
#print(s)
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

#print(s-1)

n_features = s-1

#SPLIT INTO TRAINING, VALIDATION AND TEST SETS
print("\nSplit into training, validation and test sets...")

train_rate = 80
val_rate = 10
train = round(int((dataset.shape[0]*train_rate)/100))
val = round(int((dataset.shape[0]*(train_rate+val_rate))/100))

train_data = X[:train]
train_label = Y[:train]

val_data = X[train+1:val]
val_label = Y[train+1:val]

test_data = X[val+1:]
test_label = Y[val+1:]
print("DATASET SPLITTED")

#COMPUTE CLASS WEIGHT
labels = np.unique(train_label)
classWeight = compute_class_weight('balanced', labels, train_label)
classWeight = dict(zip(labels,classWeight))

#GENERATE MODEL
print("\nGenerate model...")

modello = 1 #@param {type:"integer"}

if modello==1 :
  model = generate_model_leaky(train_data.shape[-1], n_features)
elif modello==2:
  model = generate_model(train_data.shape[-1], n_features)

#OPTIMIZERS
adm = optimizers.Adam(lr=0.0001)

#COMPILE MODEL
print("\nCompile model...")
model.compile(loss='binary_crossentropy', optimizer = adm , metrics=['accuracy'])

#EARLY STOPPING
es = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

#FIT MODEL
print("\nFit model...")
history = model.fit(train_data, train_label, epochs=10, validation_data = (val_data, val_label), batch_size = 128, shuffle = True, class_weight = classWeight, verbose=1, callbacks = [es])

#EVALUATE MODEL
print("\nEvaluate model...")
scores_test = model.evaluate(test_data, test_label, batch_size=128, verbose = 1)
print("Test loss: %.2f%%" % (scores_test[0] * 100))
print("Test accuracy: %.2f%%" % (scores_test[1] * 100))

#CALCULATE PREDICTIONS
print("\nCalculate predictions...")
pred = model.predict_classes(test_data, batch_size=128, verbose=0)
flat_pred = [item for sublist in pred for item in sublist]

#CONFUSION MATRIX
print("\nCompute confusion matrix...")
y_true = test_label
y_pred = pred
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

time = []
for i in test_data:
  time.append(i[-5])

Loading data...
DATASET LOADED

Converting values...
CONVERSION DONE

Split into training, validation and test sets...
DATASET SPLITTED

Generate model...

Compile model...

Fit model...
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train on 280392 samples, validate on 35048 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

Evaluate model...
Test loss: 52.21%
Test accuracy: 79.13%

Calculate predictions...

Compute confusion matrix...
TN 25916
FP 7065
FN 249
TP 1819
Other corretti: 78.58 %
Pasto corretti: 87.96 %


In [0]:
import more_itertools
import numpy

def sliding_windows(flat_pred):
  
  window_len = 11
  half_window = int(window_len/2)
  
  windowsList = list(more_itertools.windowed(flat_pred,n=window_len, step=1))

  result = [];
  result.append(flat_pred[0:half_window])
  i = 0 
  while i < len(windowsList): 
    count = 0
    c = windowsList[i][half_window]

    j = 0  
    while j < window_len:
      if windowsList[i][j] == c :
        count = count + 1
      j = j + 1

    if count<(half_window+2) :
      if c == 1:
        result.append(0)
      else:
        result.append(1)
    else:
      result.append(c)

    i = i + 1

  result.append(flat_pred[-(half_window+1):-1])

  flat_result = numpy.hstack(result)

  #print(len(flat_result))

  return flat_result



def sliding_windows_time_colazione(new_pred_1, time):
  
  colazione = []
  result = []
  
  cont1 = 0
  cont2 = 0
  
  window_len = 11
  half_window = int(window_len/2)
  
  for l,t in zip(new_pred_1, time):
    
    if(t>=300 and t<=630):
      colazione.append(l)
      cont1+=1
      
      if(t==630):
        
        windowsList_colazione = list(more_itertools.windowed(colazione,n=window_len, step=1))

        result.append(colazione[0:half_window])
        i = 0 
        while i < len(windowsList_colazione): 
          count = 0
          c = windowsList_colazione[i][half_window]

          j = 0  
          while j < window_len:
            if windowsList_colazione[i][j] == c :
              count = count + 1
            j = j + 1

          if count<(half_window+2) :
            if c == 1:
              result.append(0)
            else:
              result.append(1)
          else:
            result.append(c)

          i = i + 1

        result.append(colazione[-(half_window+1):-1])
        
        del colazione[:]
        
    else:
      result.append(l)
      cont2+=1
  
  flat_result = numpy.hstack(result)
  

  #print(cont1, cont2, (cont1+cont2))

  return flat_result
        

def sliding_windows_time_pranzocena(new_pred_2, time):
  
  pranzo = []
  cena = []
  result = []
  
  cont1 = 0
  cont2 = 0
  cont3 = 0
  
  window_len = 41
  half_window = int(window_len/2)
  
  for l,t in zip(new_pred_2, time):
    
    if(t>=631 and t<=900):
      pranzo.append(l)
      cont2+=1
      
      if(t==900):

        windowsList_pranzo = list(more_itertools.windowed(pranzo,n=window_len, step=1))

        result.append(pranzo[0:half_window])
        i = 0 
        while i < len(windowsList_pranzo): 
          count = 0
          c = windowsList_pranzo[i][half_window]

          j = 0  
          while j < window_len:
            if windowsList_pranzo[i][j] == c :
              count = count + 1
            j = j + 1

          if count<(half_window+2) :
            if c == 1:
              result.append(0)
            else:
              result.append(1)
          else:
            result.append(c)

          i = i + 1

        result.append(pranzo[-(half_window+1):-1])
        
        del pranzo[:]
         
    elif(t>=1110 and t<=1379):
      cena.append(l)
      cont3+=1

      if(t==1379):

        windowsList_cena = list(more_itertools.windowed(cena,n=window_len, step=1))

        result.append(cena[0:half_window])
        i = 0 
        while i < len(windowsList_cena): 
          count = 0
          c = windowsList_cena[i][half_window]

          j = 0  
          while j < window_len:
            if windowsList_cena[i][j] == c :
              count = count + 1
            j = j + 1

          if count<(half_window+2) :
            if c == 1:
              result.append(0)
            else:
              result.append(1)
          else:
            result.append(c)

          i = i + 1

        result.append(cena[-(half_window+1):-1])
        
        del cena[:]
        
    else:
      result.append(l)
      cont1+=1

  flat_result = numpy.hstack(result)
  

  #print(cont1, cont2, cont3, (cont1+cont2+cont3))

  return flat_result

In [17]:

new_pred_1 = sliding_windows(flat_pred)
new_pred_2 = sliding_windows_time_colazione(new_pred_1, time)
new_pred_3 = sliding_windows_time_pranzocena(new_pred_2, time)

with open ("/root/data/out.txt", "w") as f:
  numpy.savetxt(f, new_pred_3, delimiter=',')

print("RISULTATI SALVATI IN ROOT/DATA/OUT.TXT")

  
#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 1...")
y_true = test_label
n_y_pred = new_pred_1
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 2...")
y_true = test_label
n_y_pred = new_pred_2
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 3...")
y_true = test_label
n_y_pred = new_pred_3
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)


RISULTATI SALVATI IN ROOT/DATA/OUT.TXT

Compute NEW confusion matrix...
NEW PRED 1...
TN 26134
FP 6847
FN 229
TP 1839
Other corretti: 79.24 %
Pasto corretti: 88.93 %

Compute NEW confusion matrix...
NEW PRED 2...
TN 26160
FP 6821
FN 235
TP 1833
Other corretti: 79.32 %
Pasto corretti: 88.64 %

Compute NEW confusion matrix...
NEW PRED 3...
TN 26332
FP 6649
FN 252
TP 1816
Other corretti: 79.84 %
Pasto corretti: 87.81 %


In [18]:
'''
#@title SCEGLI maxWidth per postprocessing_Silvia

#POST PROCESSING

maxWidth = 3 #@param {type:"integer"}

new_pred = postprocessing_sliding.sliding_windows_time(flat_pred, time)
#new_pred = postprocessing_Silvia.setWidth(maxWidth,flat_pred)

#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
y_true = test_label
n_y_pred = new_pred
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)
'''

'\n#@title SCEGLI maxWidth per postprocessing_Silvia\n\n#POST PROCESSING\n\nmaxWidth = 3 #@param {type:"integer"}\n\nnew_pred = postprocessing_sliding.sliding_windows_time(flat_pred, time)\n#new_pred = postprocessing_Silvia.setWidth(maxWidth,flat_pred)\n\n#CONFUSION MATRIX\nprint("\nCompute NEW confusion matrix...")\ny_true = test_label\nn_y_pred = new_pred\ntn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()\nprint("TN", tn)\nprint("FP", fp)\nprint("FN", fn)\nprint("TP", tp)\nother = 100*tn/(tn+fp)\npasto = 100*tp/(fn+tp)\nprint("Other corretti: %.2f %%" % other)\nprint("Pasto corretti: %.2f %%" % pasto)\n'

In [0]:
'''
@Description: PLOTTING
'''

#plotting.plot_model_results(history)
plotting.plot_co2_temp(flat_pred, val, new_pred_3)


In [0]:
#model.save('my_model1.h5')
#model = load_model('my_model.h5')