<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/model1_postprocessing_Franci(controllo_picchi_co2).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''
@File name: model1.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import numpy as np

from keras import optimizers
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization, Activation
from keras.callbacks import EarlyStopping

from sklearn.utils import compute_class_weight
from sklearn.metrics import confusion_matrix

import sys
sys.path.append('local_modules')

import postprocessing_sw
#import postprocessing_Silvia
import plotting

Collecting liac-arff
  Downloading https://files.pythonhosted.org/packages/e9/35/fbc9217cfa91d98888b43e1a19c03a50d716108c58494c558c65e308f372/liac-arff-2.4.0.tar.gz
Building wheels for collected packages: liac-arff
  Building wheel for liac-arff (setup.py) ... [?25ldone
[?25h  Stored in directory: /root/.cache/pip/wheels/d1/6a/e7/529dc54d76ecede4346164a09ae3168df358945612710f5203
Successfully built liac-arff
Installing collected packages: liac-arff
Successfully installed liac-arff-2.4.0


Using TensorFlow backend.


In [0]:
#fix random seed for reproducibility
seed = 5
np.random.seed(seed)

In [0]:
'''
@Description: generate a multilayer perceptron with LeakyRelu as activation
function.
@param: 
  - shape : int, the shape of the input
  - n_features: int, the number of features given
'''

#MODELLO 1
#REGOLA: input/2, input, 2*input, 1
#layers TUTTE LE FEATURE: 57, 113, 226, 1 
#layers TIME CO2 TEMP: 21, 41, 82, 1 
#layers TIME CO2 TEMP PM25/TVOC: 30, 59, 118, 1 
#layers TIME CO2 TEMP PM25 TVOC: 39, 77, 154, 1 


def generate_model_leaky(shape, n_features):

  units_1 = int(n_features/2)
  units_2 = n_features
  units_3 = n_features*2

  model = Sequential()
  model.add(BatchNormalization())
  
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [0]:
'''
@Description: generate a multilayer perceptron with Relu as activation
function.
@param: 
  - shape : int, the shape of the input
  - n_features: int, the number of features given
'''

#MODELLO 2
#REGOLA: a= input, b= a*2/3+c, c= b*2/3+1
#layers TUTTE LE FEATURE: 113, 229, 153, 1
#layers TIME CO2 TEMP: 41, 85, 57, 1 
#layers TIME CO2 TEMP PM25/TVOC: 59, 121, 81, 1 
#layers TIME CO2 TEMP PM25 TVOC: 77, 157, 105, 1 

def generate_model(shape, n_features):
  
  a = np.array([[1,0,0],[-(2/3),1,-1],[0,-(2/3),1]])
  b = np.array([n_features,0,1])
  x = np.linalg.solve(a, b)

  units_1 = int(x[0])
  units_2 = int(x[1])
  units_3 = int(x[2])

  model = Sequential()
 
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [6]:
#@title SCEGLI IL DATASET E IL MODELLO

'''
@Description: MAIN
'''

#LOAD DATA
print("Loading data...")

dataset = '/root/data/uHooComplete_featureDataset.arff' #@param {type:"string"}

with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("\nConverting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
#print(s)
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

#print(s-1)

n_features = s-1

#SPLIT INTO TRAINING, VALIDATION AND TEST SETS
print("\nSplit into training, validation and test sets...")

train_rate = 80
val_rate = 10
train = round(int((dataset.shape[0]*train_rate)/100))
val = round(int((dataset.shape[0]*(train_rate+val_rate))/100))

train_data = X[:train]
train_label = Y[:train]

val_data = X[train+1:val]
val_label = Y[train+1:val]

test_data = X[val+1:]
test_label = Y[val+1:]
print("DATASET SPLITTED")

#COMPUTE CLASS WEIGHT
labels = np.unique(train_label)
classWeight = compute_class_weight('balanced', labels, train_label)
classWeight = dict(zip(labels,classWeight))

#GENERATE MODEL
print("\nGenerate model...")

modello = 1 #@param {type:"integer"}

if modello==1 :
  model = generate_model_leaky(train_data.shape[-1], n_features)
elif modello==2:
  model = generate_model(train_data.shape[-1], n_features)

#OPTIMIZERS
adm = optimizers.Adam(lr=0.0001)

#COMPILE MODEL
print("\nCompile model...")
model.compile(loss='binary_crossentropy', optimizer = adm , metrics=['accuracy'])

#EARLY STOPPING
es = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

#FIT MODEL
print("\nFit model...")
history = model.fit(train_data, train_label, epochs=10, validation_data = (val_data, val_label), batch_size = 128, shuffle = True, class_weight = classWeight, verbose=1, callbacks = [es])

#EVALUATE MODEL
print("\nEvaluate model...")
scores_test = model.evaluate(test_data, test_label, batch_size=128, verbose = 1)
print("Test loss: %.2f%%" % (scores_test[0] * 100))
print("Test accuracy: %.2f%%" % (scores_test[1] * 100))

#CALCULATE PREDICTIONS
print("\nCalculate predictions...")
pred = model.predict_classes(test_data, batch_size=128, verbose=0)
flat_pred = [item for sublist in pred for item in sublist]

#CONFUSION MATRIX
print("\nCompute confusion matrix...")
y_true = test_label
y_pred = pred
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

time = []
for i in test_data:
  time.append(i[-5])

Loading data...
DATASET LOADED

Converting values...
CONVERSION DONE

Split into training, validation and test sets...
DATASET SPLITTED

Generate model...
Instructions for updating:
Colocations handled automatically by placer.

Compile model...

Fit model...
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 280392 samples, validate on 35048 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

Evaluate model...
Test loss: 51.49%
Test accuracy: 79.46%

Calculate predictions...

Compute confusion matrix...
TN 26037
FP 6944
FN 255
TP 1813
Other corretti: 78.95 %
Pasto corretti: 87.67 %


In [50]:
new_pred_1 = postprocessing_sw.sliding_windows(postprocessing_sw.sliding_windows(flat_pred,5),5)
#new_pred_2 = postprocessing_sw.sliding_windows_time_colazione(new_pred_1, time, 11)
new_pred_3 = postprocessing_sw.sliding_windows_time_pranzocena(postprocessing_sw.sliding_windows_time_pranzocena(new_pred_1, time, 41, 41),time, 41, 41)
new_pred_4 = postprocessing_sw.sliding_windows(postprocessing_sw.sliding_windows(new_pred_3,5),5)

new_pred = new_pred_4

'''
with open ("/root/data/out.txt", "w") as f:
  numpy.savetxt(f, new_pred, delimiter=',')

print("RISULTATI SALVATI IN ROOT/DATA/OUT.TXT")
'''
  
#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 1...")
y_true = test_label
n_y_pred = new_pred_1
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)
'''
#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 2...")
y_true = test_label
n_y_pred = new_pred_2
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)
'''

#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 3...")
y_true = test_label
n_y_pred = new_pred_3
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)


#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED 4...")
y_true = test_label
n_y_pred = new_pred_4
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)



Compute NEW confusion matrix...
NEW PRED 1...
TN 26138
FP 6843
FN 252
TP 1816
Other corretti: 79.25 %
Pasto corretti: 87.81 %

Compute NEW confusion matrix...
NEW PRED 3...
TN 26303
FP 6678
FN 251
TP 1817
Other corretti: 79.75 %
Pasto corretti: 87.86 %

Compute NEW confusion matrix...
NEW PRED 4...
TN 26304
FP 6677
FN 249
TP 1819
Other corretti: 79.76 %
Pasto corretti: 87.96 %


In [0]:
#Carico solamente i valori della CO2 e del datetime dell'insieme di test

#LOAD SENSOR DATA
datasetSensor = '/root/data/uHooComplete_featureDataset_Reduced.arff'

with open (datasetSensor, encoding='utf-8') as fs:
  dataSensor = arff.load(fs)

dataS = np.array(dataSensor['data'])

#CONVERTING VALUES
for i in dataS:
  if(i[-1] == 'Other'): i[-1] = 0
  else : i[-1] = 1

#TEST SENSOR DATA  
dataT = dataS[val+1:]
datastr = dataT[:,1]
co2 = datastr.astype('float32')

In [53]:
pasto = []
index =[]
p = []
i=0

p = new_pred_4
for i in range(0, len(time)):
  if p[i] == 1:    
    index.append(i)
    pasto.append(co2[i])
    
    if (i+1) in range(0, len(time)) and p[i+1] == 0: #se i+1 è ancora nel range e la predizione successiva è 'other'
      #calcola la media e svuota la lista
      if len(pasto) > 4:
        pasto = np.asarray(pasto)
        max = np.amax(pasto)
        pasto = pasto.tolist()
        index_max = pasto.index(max)
        #print(pasto)
        #print("lunghezza pasto:", len(pasto) )
        #print(max)
        #print(index)
        #print(index_max)
        for j in range(index[index_max], index[-1]):
          if(j<index[-1]):
            p[j+1] = 0
        
        '''for i in range(index[0], index[-1]+1):
          print(p[i])'''
        
      pasto=[]
      index=[]
        



[1550.0, 1570.0, 1554.0, 1590.0, 1635.0]
lunghezza pasto: 5
1635.0
[0, 1, 2, 3, 4]
4
1
1
1
1
1
[510.0, 481.0, 476.0, 486.0, 491.0, 511.0, 506.0, 514.0, 524.0, 528.0, 505.0, 481.0, 485.0, 476.0, 469.0, 492.0, 484.0, 489.0, 522.0, 503.0, 520.0, 543.0, 563.0, 574.0, 596.0, 658.0, 676.0, 699.0, 758.0, 809.0, 860.0, 912.0, 975.0, 983.0, 1006.0, 1098.0, 1128.0, 1205.0, 1282.0, 1274.0, 1332.0, 1382.0]
lunghezza pasto: 42
1382.0
[207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248]
41
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
[936.0, 912.0, 943.0, 962.0, 978.0, 991.0, 977.0, 970.0, 954.0, 950.0, 953.0, 976.0, 1014.0, 1022.0, 1065.0, 1143.0]
lunghezza pasto: 16
1143.0
[316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331]
15
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
[1209.0, 1195.0, 

In [55]:
#False in (p == new_pred_4) #new_pred_4 viene modificto insieme a p 

False

In [41]:
#CONFUSION MATRIX
print("\nCompute NEW confusion matrix...")
print("NEW PRED REFO...")
y_true = test_label
n_y_pred = p
tn, fp, fn, tp = confusion_matrix(y_true, n_y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)


Compute NEW confusion matrix...
NEW PRED REFO...
TN 28917
FP 4064
FN 839
TP 1229
Other corretti: 87.68 %
Pasto corretti: 59.43 %


In [0]:
'''
@Description: PLOTTING
'''

#plotting.plot_model_results(history)
plotting.plot_co2_temp(flat_pred, val, p)
