In [1]:
# cnn model
import numpy as np
from numpy import mean
from numpy import std
from numpy import dstack
import pandas as pd
from pandas import read_csv
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense, Reshape
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.math import confusion_matrix
from matplotlib import pyplot as plt
import seaborn as sns
import keras


from google.colab import drive
drive.mount('/content/gdrive')

path = 'gdrive/My Drive/Esame bellotti/Consegna Github/dataset/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
def show_confusion_matrix(validations, predictions):

    matrix = confusion_matrix(validations, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(matrix,
    #             cmap="coolwarm",
    #             linecolor='white',
    #             linewidths=1,
    #             xticklabels=['Not FOG', 'FOG'],
    #             yticklabels=['Not FOG', 'FOG'],
    #             annot=True,
    #             fmt="d")
    # plt.title("Confusion Matrix")
    # plt.ylabel("True Label")
    # plt.xlabel("Predicted Label")
    # plt.show()

    tn = matrix[0][0]
    tp = matrix[1][1]
    fp = matrix[0][1]
    fn = matrix[1][0]


    spec = tn/(tn+fp)
    sens = tp/(tp+fn)
    prec = tp/(tp+fp)

    return spec, sens, prec

In [3]:
def balance_dataset(df):
  """
  Calcolo quanti elementi sono da togliere
  Tolgo random un elemento da x e il corrispondente y
  """
  print("Bilanciamento")

  FOG = df[df.N == 1].shape[0]
  NOT_FOG = df[df.N == 0].shape[0]
  

  if NOT_FOG > FOG:
    da_togliere = NOT_FOG - FOG

    new_df = df.drop(df[df['N'].eq(0)].sample(da_togliere).index)


  FOG = new_df[new_df.N == 1].shape[0]
  NOT_FOG = new_df[new_df.N == 0].shape[0]
  

  return new_df
      



In [4]:
def segmentation(dataset):
    timesteps = 128

    pre_dfX = dataset[["A0", "A1", "A2", "U0", "U1", "U2", "T1", "T2", "T3"]]
    pre_dfy = dataset[["N"]]
    pre_dfy =  pre_dfy - 1
    print("Fog events in dataset: " + str(pre_dfy[pre_dfy.N == 1].shape[0]))
    print("Not Fog events in dataset: " + str(pre_dfy[pre_dfy.N == 0].shape[0]))


    data_X = list()
    data_y = list()

    i = 0
    while i < (len(pre_dfX)-timesteps):
      a = i + timesteps

      data_X.append(pre_dfX.iloc[int(i):int(a)].values)
      data_y.append(pre_dfy.iloc[int(a)][0])

      i += timesteps/2

    new_df = pd.DataFrame({"Data":data_X, "N":data_y})
    
    return new_df

In [5]:
def split_dataframe(df):
  #Split in train e test
  n_row = df.shape[0]
  up = round(n_row * 0.7)
  df_train = df.iloc[:up,:]
  df_test = df.iloc[up+1:,:]
  return df_train, df_test

In [6]:
def load_dataset(files):
    
    """
    This function reads the accelerometer data from a file
    Args:
        file_path: URL pointing to the CSV file
    Returns:
        A pandas dataframe
    """
    df = pd.DataFrame()
    for txt in files:
        read_df = pd.read_csv(path+txt+'.txt')
        s = read_df["N"] != 0
        tmp = read_df.loc[s]
        df = df.append(tmp)

    df = segmentation(df)
    #df = balance_dataset(df)

    df_train, df_test = split_dataframe(df)


    trainX = np.asarray(df_train["Data"].values.tolist())
    trainy = df_train["N"]

    testX = np.asarray(df_test["Data"].values.tolist())
    testy = df_test["N"]


    trainy = to_categorical(trainy,2) 
    testy = to_categorical(testy,2)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)

    return trainX, trainy, testX, testy

In [7]:
# fit and evaluate a model




def evaluate_model(trainX, trainy, testX, testy, param):

  verbose, epochs, batch_size = 0, 100, 32
  timesteps, features, outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]


  model = Sequential()
  model.add(Conv1D(filters=32, kernel_size=10, activation='relu', 
                   input_shape=(timesteps,features), name='Layer_1'))
  model.add(Conv1D(filters=32, kernel_size=10, activation='relu', name = 'Layer_2'))
  model.add(Dropout(0.5))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Flatten())
  model.add(Dense(100, activation='relu'))
  model.add(Dense(outputs, activation='softmax'))
  import time
  start = time.time()
  model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
  model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
  end = time.time()
  print("Training time: " + str(end - start))

  # evaluate model
  start = time.time()
  _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=verbose)
  end = time.time()
  print("Evaluation time: " + str(end - start))

  start = time.time()
  y_pred_test = model.predict(testX)
  end = time.time()
  print("Prediction time: " + str(end - start))

  max_y_pred_test = np.argmax(y_pred_test, axis=1)
  max_y_test = np.argmax(testy, axis=1)
  spec, sens, prec = show_confusion_matrix(max_y_test, max_y_pred_test)

  return accuracy, spec, sens, prec

In [8]:
data = [["S01R01", "S01R02","S02R01", "S02R02","S03R01","S05R01", "S05R02","S06R01","S07R01","S08R01","S09R01"]]

In [12]:
def run_experiment(repeats=10): #Multi parametro
	# load data
	fig, ax = plt.subplots()
	for d in data:
		print("##################")
		print("Paziente" + str(d))
		params = [32]
		results_acc = list()
		for p in params:
			print("> Filters = " + str(p))
			trainX, trainy, testX, testy = load_dataset(d)
			accs = list()
			specs = list()
			senss = list()
			precs = list()
		
			for r in range(repeats):
				acc, spec, sens, prec = evaluate_model(trainX, trainy, testX, testy, p)
				acc = acc * 100.0
				spec = spec * 100.0
				sens = sens * 100.0
				prec = prec * 100.0
				accs.append(acc)
				specs.append(spec)
				senss.append(sens)
				precs.append(prec)
			
				print('>#%d: acc %.3f' % (r+1, acc))
				print('>#%d: spec %.3f' % (r+1, spec))
				print('>#%d: sens %.3f' % (r+1, sens))
				print('>#%d: prec %.3f' % (r+1, prec))

			results_acc.append(mean(accs))

			print('Accuracy: %.3f%% (+/-%.3f)' % (mean(accs), std(accs)))
			print('Specificity:  %.3f%% (+/-%.3f)' % (mean(specs), std(specs)))
			print('Sensitivity:  %.3f%% (+/-%.3f)' % (mean(senss), std(senss)))
			print('Precision:  %.3f%% (+/-%.3f)' % (mean(precs), std(precs)))
		plt.plot(params, results_acc, label = str(d))
	plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
	plt.show()
	
	

In [10]:
def run_experiment_sensspec(repeats=10):
	# load data
  results_sens = list()
  results_spec = list()
  for d in data:
    print("##################")
    print("Paziente" + str(d))
    trainX, trainy, testX, testy = load_dataset(d)
    accs = list()
    specs = list()
    senss = list()
  
    for r in range(repeats):
      acc, spec, sens = evaluate_model(trainX, trainy, testX, testy, 32)
      acc = acc * 100.0
      spec = spec * 100.0
      sens = sens * 100.0
      accs.append(acc)
      specs.append(spec)
      senss.append(sens)
    
      print('>#%d: acc %.3f' % (r+1, acc))
      print('>#%d: spec %.3f' % (r+1, spec))
      print('>#%d: Recal %.3f' % (r+1, sens))

    results_spec.append(mean(spec))
    results_sens.append(mean(sens))

    print('Accuracy: %.3f%% (+/-%.3f)' % (mean(accs), std(accs)))
    print('Specificity:  %.3f%% (+/-%.3f)' % (mean(specs), std(specs)))
    print('Sensitivity:  %.3f%% (+/-%.3f)' % (mean(senss), std(senss)))

  fig, ax = plt.subplots()

  ax.scatter(results_sens, results_spec)
  ax.set_xlabel('Sensitivity')
  ax.set_ylabel('Specificity')

  for i, txt in enumerate(data):
    ax.annotate(txt, (results_sens[i], results_spec[i]))

In [13]:
# run the experiment
run_experiment()
#run_experiment_sensspec()

##################
Paziente['S01R01', 'S01R02', 'S02R01', 'S02R02', 'S03R01', 'S05R01', 'S05R02', 'S06R01', 'S07R01', 'S08R01', 'S09R01']
> Filters = 32
Fog events in dataset: 107142
Not Fog events in dataset: 672084
(8522, 128, 9) (8522, 2) (3651, 128, 9) (3651, 2)
Training time: 484.05036544799805
Evaluation time: 0.8470971584320068
Prediction time: 0.6986227035522461
>#1: acc 85.593
>#1: spec 99.968
>#1: sens 0.000
>#1: prec 0.000


KeyboardInterrupt: ignored

Error in callback <function flush_figures at 0x7f8b1d01ae60> (for post_execute):


KeyboardInterrupt: ignored