<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/model1_cross_validation_Federica.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''
@File name: model1_cross_validation.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import csv
import math

import numpy as np
from numpy import savetxt

import pandas as pd
from pandas import DataFrame
from pandas import read_csv
from pandas import concat

from keras import optimizers
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization
from keras.callbacks import EarlyStopping
from keras.preprocessing import sequence

from sklearn.utils import compute_class_weight
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib.pyplot import legend

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

Collecting liac-arff
  Downloading https://files.pythonhosted.org/packages/c9/c3/6966861e2f4d302ac3a36821f7ac503b57d98cb9cc3b28e9cd8ef8b7df65/liac-arff-2.3.1.tar.gz
Building wheels for collected packages: liac-arff
  Running setup.py bdist_wheel for liac-arff ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/81/f0/15/97687f0a23a6859a7ced7e09271d321930c6641c2675d04745
Successfully built liac-arff
Installing collected packages: liac-arff
Successfully installed liac-arff-2.3.1


Using TensorFlow backend.


In [0]:
#layers : 57, 113, 226, 1 (regola: input/2, input, 2*input, 1)
#layers : 113, 229, 153, 1 (regola: a= input, b= a*2/3+c, c= b*2/3+1, 1)

def generate_model_leaky(shape):
  
  model = Sequential()
  model.add(BatchNormalization())
  
  model.add(Dense(57, input_dim=shape, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(113, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(226, kernel_initializer='random_uniform',  use_bias = False))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha = 0.2))
  model.add(Dropout(0.5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [0]:
#layers : 57, 113, 226, 1 (regola: input/2, input, 2*input, 1)
#layers : 113, 229, 153, 1 (regola: a= input, b= a*2/3+c, c= b*2/3+1, 1)

def generate_model(shape):
  
  model = Sequential()
  model.add(BatchNormalization())
 
  '''model.add(Dense(113, input_dim=shape, kernel_initializer='random_uniform',  bias_initializer='zeros', activation='relu'))'''
  model.add(Dense(113, input_dim=shape, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  '''model.add(Dense(229, kernel_initializer='random_uniform',  bias_initializer='zeros', activation='relu'))'''
  model.add(Dense(229, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  '''model.add(Dense(153, kernel_initializer='random_uniform',  bias_initializer='zeros', activation='relu'))'''
  model.add(Dense(153, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(1, activation='sigmoid'))
  #print(model.summary())

  return model

In [4]:
#LOAD DATA
print("Loading data...")

dataset = '/root/data/6_uHoo_featureDataset.arff'

with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("Converting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

Loading data...
DATASET LOADED
Converting values...
CONVERSION DONE


In [5]:
#OPTIMIZERS

#sgd = optimizers.SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False)
#rms = optimizers.RMSprop(lr=0.0001, rho=0.9, epsilon=None, decay=0.0) #It is recommended to leave the parameters of this optimizer at their default values (except the learning rate, which can be freely tuned).
#adg = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0) #It is recommended to leave the parameters of this optimizer at their default values.
#ada = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) #It is recommended to leave the parameters of this optimizer at their default values.
#ama = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
#nad = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) #It is recommended to leave the parameters of this optimizer at their default values.
adm = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

#LOSS

#loss = 'mean_squared_error'
#loss = 'mean_squared_logarithmic_error'
loss = 'binary_crossentropy'

#DEFINE K-FOLD CROSS-VALIDATION
kfold = KFold(n_splits=10, shuffle=False, random_state=None)

cvscores = []
predictions = []
actual = []
true_p =[]
true_n = []

dimSplit = math.floor(len(dataset[:,0])/10)
startIndex = 0
finishIndex = dimSplit-1

i = 1

for train, test in kfold.split(X, Y):
  
  print("\nFOLD: %d" %i)
  
  #COMPUTE CLASS WEIGHT
  labels = np.unique(Y[train])
  classWeight = compute_class_weight('balanced', labels, Y[train])
  classWeight = dict(zip(labels,classWeight))

  #GENERATE MODEL
  model = generate_model_leaky(X[train].shape[-1])
  #model = generate_model(X[train].shape[-1])

  #COMPILE MODEL
  model.compile(loss = loss, optimizer = adm , metrics=['accuracy'])

  #FIT MODEL
  history = model.fit(X[train], Y[train], epochs=7, batch_size = 128, shuffle = True, verbose=1, class_weight = classWeight)

  #EVALUATE MODEL
  scores_test = model.evaluate(X[test], Y[test], batch_size= 128, verbose = 0)
  print("Test loss: %.2f%%" % (scores_test[0] * 100))
  print("Test accuracy: %.2f%%" % (scores_test[1] * 100))
  
  cvscores.append(scores_test[1] * 100)
  
  #CALCULATE PREDICTIONS AND SAVE IN A CSV FILE
  pred = model.predict_classes(X[test], batch_size=128, verbose=0)
  predictions.append([i,pred])
  #actual.append([i,Y[test]])
  
  #LOAD SENSORE DATA
  datasetSensor = '/root/data/6_uHoo_featureDataset_Reduced.arff'

  with open (datasetSensor, encoding='utf-8') as fs:
    dataSensor = arff.load(fs)

  dataS = np.array(dataSensor['data'])

  #CONVERTING VALUES
  for y in dataS:
    if(y[-1] == 'Other'): y[-1] = 0
    else : y[-1] = 1
  
  #TEST DATA SENSOR 
  dataT = dataS[startIndex:finishIndex, :]
  startIndex = startIndex + dimSplit
  finishIndex = dimSplit + finishIndex

  new_rows = []

  for x,j,z in zip(dataT, Y[test], pred):
    new_dict = {}
    new_dict['Temperature'] = x[0]
    new_dict['CO2'] =  x[1]
    new_dict['Actual'] = j
    new_dict['Predicted'] = z

    new_rows.append(new_dict)

  keys = new_rows[0].keys()
  
  s = "/root/data/out{}.csv".format(i)

  with open(s, "w", newline='') as o:
    w = csv.DictWriter(o, keys)
    w.writeheader()
    w.writerows(new_rows)
  
  #CONFUSION MATRIX
  y_true = Y[test]
  tn, fp, fn, tp = confusion_matrix(y_true, pred).ravel()
  (tn, fp, fn, tp)
  other = 100*tn/(tn+fp)
  pasto = 100*tp/(fn+tp)
  true_p.append(pasto)
  true_n.append(other)
  print("Other: %.2f %%" % other)
  print("Pasto: %.2f %%" % pasto)
  
  i+=1

print("MEAN ACCURACY: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
print("MEAN TRUE POSITIVE RATE: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(true_p), np.std(true_p)))
print("MEAN TRUE NEGATIVE RATE: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(true_n), np.std(true_n)))



FOLD: 1
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 31.67%
Test accuracy: 88.47%
Other: 91.37 %
Pasto: 31.26 %

FOLD: 2
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 43.42%
Test accuracy: 79.64%
Other: 80.47 %
Pasto: 59.24 %

FOLD: 3
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 42.60%
Test accuracy: 81.26%
Other: 82.74 %
Pasto: 61.23 %

FOLD: 4
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 48.39%
Test accuracy: 81.08%
Other: 81.62 %
Pasto: 72.47 %

FOLD: 5
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 37.37%
Test accuracy: 85.94%
Other: 86.55 %
Pasto: 73.07 %

FOLD: 6
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 35.11%
Test accuracy: 90.66%
Other: 91.18 %
Pasto: 63.12 %

FOLD: 7
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Test loss: 40.77%
Test accu

Save model:

model.save('my_model1_cv.h5')

Load a saved medel:

model = load_model('my_model1_cv.h5')