<a href="https://colab.research.google.com/github/FG2511/ARE/blob/master/model1_cross_validation_Silvia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
'''
@File name: model1.ipynb
@Created on 2018-12-20
@Authors: Federica Gerina, Francesca Moi, Silvia Maria Massa
@Description: Given a time-series dataset that contains minute-by-minute data 
about different kind of gases, collected by the uHoo air quality sensor, train
a NN that classifies if a minute belongs to the class "Pasto" (1) otherwise to
the class "Other" (0).
'''

!pip install liac-arff

import arff
import csv

import math

'''Provides a high-performance multidimensional array object, 
and tools for working with these arrays'''
import numpy as np
'''Save an array to a text file.'''
from numpy import savetxt

'''Providing high-performance, easy-to-use data structures and data analysis tools'''
import pandas as pd
'''Two-dimensional size-mutable, 
potentially heterogeneous tabular data structure with labeled axes (rows and columns). 
Arithmetic operations align on both row and column labels.'''
from pandas import DataFrame
'''Read CSV file into DataFrame.
Supports optionally iterating or breaking of the file into chunks.'''
from pandas import read_csv
'''Concatenate pandas objects along a particular axis with optional set logic along the other axes.
Can also add a layer of hierarchical indexing on the concatenation axis, 
which may be useful if the labels are the same (or overlapping) on the passed axis number'''
from pandas import concat

from keras import optimizers
'''Linear stack of layers.'''
from keras.models import Sequential
'''Loads a model saved via save_model.'''
from keras.models import load_model
'''Dense: just your regular densely-connected NN layer.
Dropout: applies Dropout to the input.'''
from keras.layers import Dense, Dropout, BatchNormalization,Activation
from keras.callbacks import EarlyStopping
from keras.preprocessing import sequence

from sklearn.utils import compute_class_weight
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib.pyplot import legend

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)



In [0]:
def generate_model(shape):
  
  model = Sequential()
  model.add(BatchNormalization())
 
  '''model.add(Dense(113, input_dim=shape, kernel_initializer='random_uniform',  bias_initializer='zeros', activation='relu'))'''
  model.add(Dense(113, input_dim=shape, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  '''model.add(Dense(229, kernel_initializer='random_uniform',  bias_initializer='zeros', activation='relu'))'''
  model.add(Dense(229, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  '''model.add(Dense(153, kernel_initializer='random_uniform',  bias_initializer='zeros', activation='relu'))'''
  model.add(Dense(153, kernel_initializer='random_uniform', use_bias = False))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Dropout(0,5))
  
  model.add(Dense(1, activation='sigmoid'))
  
  #print(model.summary())

  return model

In [8]:
#LOAD DATA
print("Loading data...")

dataset = '/root/data/6_uHoo_featureDataset.arff'

with open (dataset, encoding='utf-8') as f:
  dataDictionary = arff.load(f)

data = np.array(dataDictionary['data'])
print("DATASET LOADED")

#CONVERTING VALUES
print("Converting values...")
for i in data:
  if(i[-1] == 'Other'): i[-1] = 0
  elif(i[-1] == 'Pasto') : i[-1] = 1

dataset = data.astype('float32')
print("CONVERSION DONE")

#SPLIT INTO INPUT (X) AND OUTPUT (Y) VARIABLES
s = dataset.shape[-1]
X = dataset[:,0:s-1]
Y = dataset[:,s-1]

Loading data...
DATASET LOADED
Converting values...
CONVERSION DONE


In [0]:
#OPTIMIZERS
#sgd = optimizers.SGD(lr=0.0001)
adm = optimizers.Adam(lr=0.0001)
#ada = optimizers.Adadelta(lr=0.0001)
#rms = optimizers.RMSprop(lr=0.001)

#DEFINE 10-FOLD CROSS-VALIDATION
kfold = KFold(n_splits=10, shuffle=False)
cvscores = []
predictions = []
tpTot = []
tnTot = []

dimSplit = math.floor(len(dataset[:,0])/10)
startIndex = 0
finishIndex = dimSplit-1

i = 1

for train, test in kfold.split(X, Y):
  print("\nFOLD: %d" %i)
  
  #COMPUTE CLASS WEIGHT
  labels = np.unique(Y[train])
  classWeight = compute_class_weight('balanced', labels, Y[train])
  classWeight = dict(zip(labels,classWeight))
  
  #GENERATE MODEL
  model = generate_model(X[train].shape[-1])

  #COMPILE MODEL
  model.compile(loss = 'binary_crossentropy', optimizer = adm , metrics=['accuracy'])
  
  #EARLY STOPPING
  #es = EarlyStopping(monitor='val_acc', min_delta=0, patience=2, verbose=0, mode='auto')
  
  #FIT MODEL
  history = model.fit(X[train], Y[train], epochs=10, batch_size = 128, shuffle = True, verbose=1, class_weight = classWeight) #callbacks = [es])

  #EVALUATE MODEL
  scores_test = model.evaluate(X[test], Y[test], batch_size= 128, verbose = 1)
  print("Test loss: %.2f%%" % (scores_test[0] * 100))
  print("Test accuracy: %.2f%%" % (scores_test[1] * 100))
  
  cvscores.append(scores_test[1] * 100)
  
  #CALCULATE PREDICTIONS AND SAVE IN A CSV FILE
  pred = model.predict_classes(X[test], batch_size=128, verbose=1)
  predictions.append([i,pred])
  
  #LOAD SENSORE DATA
  datasetSensor = '/root/data/6_uHoo_featureDataset_Reduced.arff'

  with open (datasetSensor, encoding='utf-8') as fs:
    dataSensor = arff.load(fs)

  dataS = np.array(dataSensor['data'])

  #CONVERTING VALUES
  for y in dataS:
    if(y[-1] == 'Other'): y[-1] = 0
    else : y[-1] = 1
  
  #TEST DATA SENSOR 
  dataT = dataS[startIndex:finishIndex, :]
  startIndex = startIndex + dimSplit
  finishIndex = dimSplit + finishIndex

  new_rows = []

  for x,j,z in zip(dataT, Y[test], pred):
    new_dict = {}
    new_dict['Temperature'] = x[0]
    new_dict['CO2'] =  x[1]
    new_dict['Actual'] = j
    new_dict['Predicted'] = z

    new_rows.append(new_dict)

  keys = new_rows[0].keys()
  
  s = "/root/data/out{}.csv".format(i)

  with open(s, "w", newline='') as o:
    w = csv.DictWriter(o, keys)
    w.writeheader()
    w.writerows(new_rows)
 
  #CONFUSION MATRIX
  y_true = Y[test]
  y_pred = pred
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
  print("TN", tn)
  print("FP", fp)
  print("FN", fn)
  print("TP", tp)
  other = 100*tn/(tn+fp)
  pasto = 100*tp/(fn+tp)
  print("Other corretti: %.2f %%" % other)
  print("Pasto corretti: %.2f %%" % pasto)
  
  tpTot.append(pasto)
  tnTot.append(other)
  
  i = i+1

print("MEAN ACCURACY: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
print("MEAN TP: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(tpTot), np.std(tpTot)))
print("MEAN TN: %.2f%% (STANDARD DEVIATION: +/- %.2f%%)" % (np.mean(tnTot), np.std(tnTot)))



FOLD: 1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

In [0]:
#model.save('my_model1_cv.h5')

#model = load_model('my_model1_cv.h5')