<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/model1_cross_validation_Francesca.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

'''
@File name: model1.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import csv

import numpy as np
from numpy import savetxt

import pandas as pd
from pandas import DataFrame
from pandas import read_csv
from pandas import concat

from keras import optimizers
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, Activation, LeakyReLU, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.preprocessing import sequence

from sklearn.utils import compute_class_weight
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib.pyplot import legend

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)



Using TensorFlow backend.


In [0]:
def generate_model(shape):
  
  model = Sequential()
  model.add(BatchNormalization())
  
  model.add(Dense(113, input_dim=shape, use_bias=False, kernel_initializer='random_uniform'))#, activation='relu'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0.5))  
  
  model.add(Dense(229, use_bias=False, kernel_initializer='random_uniform')) #, activation='relu'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
 
  model.add(Dense(153, use_bias=False, kernel_initializer='random_uniform'))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  
  model.add(Dense(1, activation='sigmoid'))
 
  #print(model.summary())

  return model

In [3]:
#LOAD DATA
print("Loading data...")

dataset = '/root/data/6_uHoo_featureDataset.arff'

with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("Converting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

Loading data...
DATASET LOADED
Converting values...
CONVERSION DONE


In [0]:
#LOAD SENSORE DATA
datasetSensor = '/root/data/6_uHoo_featureDataset_Reduced.arff'

with open (datasetSensor, encoding='utf-8') as fs:
  dataSensor = arff.load(fs)

dataS = np.array(dataSensor['data'])

#CONVERTING VALUES
for i in dataS:
  if(i[-1] == 'Other'): i[-1] = 0
  else : i[-1] = 1

In [0]:
#model.save('my_model1_cv.h5')

#model = load_model('my_model1_cv.h5')

In [6]:
#OPTIMIZERS
sgd = optimizers.SGD(lr=0.0001)
adm = optimizers.Adam(lr=0.0001)
rms = optimizers.RMSprop(lr=0.001)

#OTHER OPTIMIZERS
adagrad = optimizers.Adagrad(lr=0.0001)
adadelta = optimizers.Adadelta(lr=0.0001)
adamax = optimizers.Adamax(lr=0.0001)
nadam = optimizers.Nadam(lr=0.0001)

#DEFINE 10-FOLD CROSS-VALIDATION
kfold = KFold(n_splits=10, shuffle=False)
cvscores = []
predictions = []
trueP = []
trueN = []
testIndex = 0

k = 1

for train, test in kfold.split(X, Y):
  size = test.shape[0]
  print("\nFOLD: %d" %k)
  
  #COMPUTE CLASS WEIGHT
  labels = np.unique(Y[train])
  classWeight = compute_class_weight('balanced', labels, Y[train])
  classWeight = dict(zip(labels,classWeight))

  #GENERATE MODEL
  model = generate_model(X[train].shape[-1])

  #COMPILE MODEL
  model.compile(loss='binary_crossentropy', optimizer = adm , metrics=['accuracy'])
  
  #EARLY STOPPING
  #es = EarlyStopping(monitor='val_acc', min_delta=0, patience=2, verbose=1, mode='auto')

  #FIT MODEL
  history = model.fit(X[train], Y[train], epochs=6, batch_size = 128, shuffle = True, verbose=1, class_weight = classWeight)#, callbacks = [es])

  #EVALUATE MODEL
  scores_test = model.evaluate(X[test], Y[test], batch_size= 128, verbose = 1)
  print("Test loss: %.2f%%" % (scores_test[0] * 100))
  print("Test accuracy: %.2f%%" % (scores_test[1] * 100))
  
  cvscores.append(scores_test[1] * 100)
  
  #CALCULATE PREDICTIONS
  pred = model.predict_classes(X[test], batch_size=128, verbose=1)
  predictions.append(pred)
  
  y_true = Y[test]
  tn, fp, fn, tp = confusion_matrix(y_true, pred).ravel()
  (tn, fp, fn, tp)
  other = 100*tn/(tn+fp)
  pasto = 100*tp/(fn+tp)
  print("Other: %.2f %%" % other)
  print("Pasto: %.2f %%" % pasto)
  trueP.append(pasto)
  trueN.append(other)
  
  if(k==10): predictions.append(Y[test])
  
  #TEST DATA SENSOR 
  print("\ntrain da %d a %d" % (testIndex, testIndex+size-1))
  dataT = dataS[testIndex:testIndex+size-1]
  
  new_rows = []

  
  
  for i,j,z in zip(dataT, y_true, pred):
    new_dict = {}
    new_dict['Temperature'] = i[0]
    new_dict['CO2'] =  i[1]
    new_dict['Actual'] = j
    new_dict['Predicted'] = z

    new_rows.append(new_dict)

  keys = new_rows[0].keys() 
  
  path = "/root/data/" + str(k) + "_out.csv"
  with open(path, "w", newline='') as o:
    w = csv.DictWriter(o, keys)
    w.writeheader()
    w.writerows(new_rows)
  
  k+=1
  testIndex+=size
  
print("MEAN ACCURACY: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
print("MEAN TRUE POSITIVE: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(trueP), np.std(trueP)))
print("MEAN TRUE NEGATIVE: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(trueN), np.std(trueN)))


FOLD: 1
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 39.26%
Test accuracy: 79.57%
Other: 81.02 %
Pasto: 50.77 %

train da 0 a 22992

FOLD: 2
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 42.90%
Test accuracy: 77.26%
Other: 77.96 %
Pasto: 60.02 %

train da 22993 a 45985

FOLD: 3
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 42.35%
Test accuracy: 80.23%
Other: 81.05 %
Pasto: 69.23 %

train da 45986 a 68978

FOLD: 4
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 51.72%
Test accuracy: 75.59%
Other: 75.39 %
Pasto: 78.85 %

train da 68979 a 91971

FOLD: 5
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 37.94%
Test accuracy: 82.92%
Other: 83.15 %
Pasto: 78.09 %

train da 91972 a 114964

FOLD: 6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 33.68%
Test accuracy: 87.80%
Other: 88.25 %
Pasto: 63.59 %

train da 114965 a 137957

FOLD: 7
Epoch 1/6
Epoc