<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/project/model1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
'''
@File name: model1.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import csv

import numpy as np
from numpy import savetxt

import pandas as pd
from pandas import DataFrame
from pandas import read_csv
from pandas import concat

from keras import optimizers
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization, Activation
from keras.callbacks import EarlyStopping
from keras.preprocessing import sequence

from sklearn.utils import compute_class_weight
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib.pyplot import legend

from collections import deque

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)



In [0]:
#MODELLO 1
#REGOLA: input/2, input, 2*input, 1
#layers TUTTE LE FEATURE: 57, 113, 226, 1 
#layers TIME CO2 TEMP: 21, 41, 82, 1 
#layers TIME CO2 TEMP PM25/TVOC: 30, 59, 118, 1 
#layers TIME CO2 TEMP PM25 TVOC: 39, 77, 154, 1 

#MODELLO 5
#REGOLA: input, 2*input, 1
#layers TUTTE LE FEATURE: 113, 226, 1 
#layers TIME CO2 TEMP: 41, 82, 1 
#layers TIME CO2 TEMP PM25/TVOC: 59, 118, 1 
#layers TIME CO2 TEMP PM25 TVOC: 77, 154, 1 


def generate_model_leaky(shape, n_features):
  
  #MODELLO 1
  units_1 = int(n_features/2)
  units_2 = n_features
  units_3 = n_features*2
  
  #MODELLO 5
  #units_1 = n_features
  #units_2 = n_features*2
  
  
  model = Sequential()
  model.add(BatchNormalization())
  
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [0]:
#MODELLO 2 E MODELLO 3
#REGOLA: a= input, b= a*2/3+c, c= b*2/3+1
#layers TUTTE LE FEATURE: 113, 229, 153, 1
#layers TIME CO2 TEMP: 41, 85, 57, 1 
#layers TIME CO2 TEMP PM25/TVOC: 59, 121, 81, 1 
#layers TIME CO2 TEMP PM25 TVOC: 77, 157, 105, 1 

#MODELLO 4
#REGOLA: input, input/2, 1
#layers TUTTE LE FEATURE: 113, 57, 1 
#layers TIME CO2 TEMP: 41, 21, 1 
#layers TIME CO2 TEMP PM25/TVOC: 59, 30, 1 
#layers TIME CO2 TEMP PM25 TVOC: 77, 29, 1 

def generate_model(shape, n_features):
  
  #MODELLO 2 e 3
  
  a = np.array([[1,0,0],[-(2/3),1,-1],[0,-(2/3),1]])
  b = np.array([n_features,0,1])
  x = np.linalg.solve(a, b)
  
  units_1 = int(x[0])
  units_2 = int(x[1])
  units_3 = int(x[2])
  
  
  #MODELLO 4
  #units_1 = n_features
  #units_2 = int(n_features/2)
  
  model = Sequential()
  
  #model.add(BatchNormalization()) #MODELLO 2 SENZA BATCHNORMALIZATION
 
  model.add(Dense(units_1, input_dim=shape, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_2, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(units_3, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [17]:
#LOAD DATA
print("Loading data...")

dataset = '/root/data/6_uHoo_featureDataset.arff'
#dataset = '/root/data/6_uHoo_featureDataset(CO2_TEMP).arff'
#dataset = '/root/data/6_uHoo_featureDataset(CO2_TEMP_PM25).arff'
#dataset = '/root/data/6_uHoo_featureDataset(CO2_TEMP_TVOC).arff'
#dataset = '/root/data/6_uHoo_featureDataset(CO2_TEMP_PM25_TVOC).arff'


with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("Converting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

Loading data...
DATASET LOADED
Converting values...
CONVERSION DONE


In [0]:
#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
#print(s)
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

#print(s-1)

n_features = s-1

#SPLIT INTO TRAINING, VALIDATION AND TEST SETS
train_rate = 80
val_rate = 10
train = round(int((dataset.shape[0]*train_rate)/100))
val = round(int((dataset.shape[0]*(train_rate+val_rate))/100))

train_data = X[:train]
train_label = Y[:train]

val_data = X[train+1:val]
val_label = Y[train+1:val]

test_data = X[val+1:]
test_label = Y[val+1:]


In [0]:
#print(pred[0:10])

flat_pred = [item for sublist in pred for item in sublist]

#print(flat_pred[0:10])

In [19]:
#COMPUTE CLASS WEIGHT
labels = np.unique(train_label)
classWeight = compute_class_weight('balanced', labels, train_label)
classWeight = dict(zip(labels,classWeight))

#GENERATE MODEL
#model = generate_model_leaky(train_data.shape[-1], n_features)
model = generate_model(train_data.shape[-1], n_features)

#OPTIMIZERS
adm = optimizers.Adam(lr=0.0001)

#COMPILE MODEL
model.compile(loss='binary_crossentropy', optimizer = adm , metrics=['accuracy'])

#EARLY STOPPING
es = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

#FIT MODEL
history = model.fit(train_data, train_label, epochs=10, validation_data = (val_data, val_label), batch_size = 128, shuffle = True, class_weight = classWeight, verbose=1, callbacks = [es])

#EVALUATE MODEL
scores_test = model.evaluate(test_data, test_label, batch_size=128, verbose = 1)
print("Test loss: %.2f%%" % (scores_test[0] * 100))
print("Test accuracy: %.2f%%" % (scores_test[1] * 100))

#CALCULATE PREDICTIONS AND SAVE IN A CSV FILE
pred = model.predict_classes(test_data, batch_size=128, verbose=0)

Train on 183944 samples, validate on 22992 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Test loss: 27.43%
Test accuracy: 90.30%


In [21]:
#CONFUSION MATRIX
y_true = test_label
y_pred = pred
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

TN 20628
FP 1563
FN 668
TP 133
Other corretti: 92.96 %
Pasto corretti: 16.60 %


In [0]:
#model.save('my_model1.h5')
#model = load_model('my_model.h5')

In [23]:
'''
#PLOT RESULTS

# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
'''

"\n#PLOT RESULTS\n\n# summarize history for accuracy\nplt.plot(history.history['acc'])\nplt.plot(history.history['val_acc'])\nplt.title('model accuracy')\nplt.ylabel('accuracy')\nplt.xlabel('epoch')\nplt.legend(['train', 'test'], loc='upper left')\nplt.show()\n\n# summarize history for loss\nplt.plot(history.history['loss'])\nplt.plot(history.history['val_loss'])\nplt.title('model loss')\nplt.ylabel('loss')\nplt.xlabel('epoch')\nplt.legend(['train', 'test'], loc='upper left')\nplt.show()\n"

In [24]:
'''
#LOAD SENSORE DATA
datasetSensor = '/root/data/6_uHoo_featureDataset_Reduced.arff'

with open (datasetSensor, encoding='utf-8') as fs:
  dataSensor = arff.load(fs)

dataS = np.array(dataSensor['data'])

#CONVERTING VALUES
for i in dataS:
  if(i[-1] == 'Other'): i[-1] = 0
  else : i[-1] = 1

#TEST DATA SENSOR 
dataT = dataS[val+1:]

new_rows = []
'''
'''
for i,j,z in zip(dataT, test_label, pred):
  new_dict = {}
  new_dict['Datetime'] = i[2]
  new_dict['Temperature'] = i[0]
  new_dict['CO2'] =  i[1]
  new_dict['Actual'] = j
  new_dict['Predicted'] = z

  new_rows.append(new_dict)
'''

'''
for i,z in zip(dataT, pred):
  new_dict = {}
  new_dict['Datetime'] = i[2]
  new_dict['Temperature'] = i[0]
  new_dict['CO2'] =  i[1]
  new_dict['Actual'] = i[-1]
  new_dict['Predicted'] = z

  new_rows.append(new_dict)


keys = new_rows[0].keys()


with open("/root/data/out.csv", "w", newline='') as o:
  w = csv.DictWriter(o, keys)
  w.writeheader()
  w.writerows(new_rows)
  

j=0
mis_pasto = []
mis_other = []

for i in new_rows:
    if(i['Actual'] == 1 and i['Predicted'] == 0):
            mis_pasto.append(j)
    elif(i['Actual'] == 0 and i['Predicted'] == 1):
            mis_other.append(j)
    j+=1
'''
'''
plt.figure(1)
plt.title('Temperature / Misclassified Pasto')
plt.plot([data['Temperature'] for data in new_rows],'g',linewidth=1, marker='8', markersize=5, markerfacecolor='red',markevery=mis_pasto)
plt.draw()
plt.savefig('/root/data/Temperature_Misclassified Pasto.svg')

plt.figure(2)
plt.title('Temperature / Misclassified Other')
plt.plot([data['Temperature'] for data in new_rows],'g',linewidth=1,marker='8', markersize=5, markerfacecolor='black',markevery=mis_other)
plt.draw()
plt.savefig('/root/data/Temperature_Misclassified Other.svg')

plt.figure(3)
plt.title('CO2 / Misclassified Pasto')
plt.plot([data['CO2'] for data in new_rows],'b',linewidth=1, marker='8', markersize=5, markerfacecolor='red',markevery=mis_pasto)
plt.draw()
plt.savefig('/root/data/CO2_Misclassified Pasto.svg')

plt.figure(4)
plt.title('CO2 / Misclassified Other')
plt.plot([data['CO2'] for data in new_rows],'b',linewidth=1,marker='8', markersize=5, markerfacecolor='black',markevery=mis_other)
plt.draw()
plt.savefig('/root/data/CO2_Misclassified Other.svg')

plt.show()
'''


"\nplt.figure(1)\nplt.title('Temperature / Misclassified Pasto')\nplt.plot([data['Temperature'] for data in new_rows],'g',linewidth=1, marker='8', markersize=5, markerfacecolor='red',markevery=mis_pasto)\nplt.draw()\nplt.savefig('/root/data/Temperature_Misclassified Pasto.svg')\n\nplt.figure(2)\nplt.title('Temperature / Misclassified Other')\nplt.plot([data['Temperature'] for data in new_rows],'g',linewidth=1,marker='8', markersize=5, markerfacecolor='black',markevery=mis_other)\nplt.draw()\nplt.savefig('/root/data/Temperature_Misclassified Other.svg')\n\nplt.figure(3)\nplt.title('CO2 / Misclassified Pasto')\nplt.plot([data['CO2'] for data in new_rows],'b',linewidth=1, marker='8', markersize=5, markerfacecolor='red',markevery=mis_pasto)\nplt.draw()\nplt.savefig('/root/data/CO2_Misclassified Pasto.svg')\n\nplt.figure(4)\nplt.title('CO2 / Misclassified Other')\nplt.plot([data['CO2'] for data in new_rows],'b',linewidth=1,marker='8', markersize=5, markerfacecolor='black',markevery=mis_othe

In [25]:
import sys
sys.path.append('local_modules')

import postprocessing_sliding

#lista = [0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,0]
new_pred = postprocessing_sliding.sliding_windows(flat_pred)
print(new_pred)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [26]:
print("New results:")
y_true = test_label
y_pred = new_pred
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TN", tn)
print("FP", fp)
print("FN", fn)
print("TP", tp)
other = 100*tn/(tn+fp)
pasto = 100*tp/(fn+tp)
print("Other corretti: %.2f %%" % other)
print("Pasto corretti: %.2f %%" % pasto)

New results:
TN 20696
FP 1495
FN 676
TP 125
Other corretti: 93.26 %
Pasto corretti: 15.61 %
