In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Let`s import all packages that we may need:

import sys 
import numpy as np # linear algebra
from scipy.stats import randint
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv), data manipulation as in SQL
import matplotlib.pyplot as plt # this is used for the plot the graph 
import seaborn as sns # used for plot interactive graph. 
from sklearn.model_selection import train_test_split # to split the data into two parts
from sklearn.preprocessing import StandardScaler # for normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline # pipeline making
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics # for the check the error and accuracy of the model
from sklearn.metrics import mean_squared_error,r2_score
from math import sqrt
from pandas import concat
from sklearn.preprocessing import LabelEncoder

## for Deep-learing:
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import SGD 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
import itertools
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Dropout

In [None]:
# Import data, convert string dates to 'datetime64' and set the date column as index:

df = pd.read_csv('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/data/STISIMData_Overtaking.csv',
                 sep=',', 
                 parse_dates={'dt' : ['Elapsed time']}, infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'], index_col='dt')

df2 = pd.read_csv('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/data/STISIMData_Turnings.csv',
                 sep=',', 
                 parse_dates={'dt' : ['Elapsed time']}, infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'], index_col='dt')

#  Review the general info on data, paying attention to missing values and dtypes
df.info()

In [None]:
df2.info()

In [None]:
new_order = ["Steering wheel angle", "Gas pedal", "speed", "Brake pedal", "Clutch pedal", "Long Dist", "Lat Pos", "Throttle input", "Brake pedal force", "Left turn", "Right turn", "Gear", "RPM", "Hand wheel torque", "Target"]
df = df.reindex(columns=new_order)
df2 = df2.reindex(columns=new_order)

In [None]:
df.head()

In [None]:
df2.head()

In [None]:
df.describe()

In [None]:
df.shape


In [None]:
df2.shape

In [None]:
df.columns

In [None]:
df2.columns

In [None]:
df.isnull().sum()

In [None]:
df.isnull().sum()

In [None]:
# Plot the time series
plt.style.use('fivethirtyeight')
df.plot(subplots=True,
        layout=(6, 3),
        figsize=(22,22),
        fontsize=10, 
        linewidth=2,
        sharex=False,
        title='Visualization of the original Time Series')
plt.savefig('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Overtaking_time_series.png')
plt.show()

In [None]:
# Let's also draw a heatmap visualization of the correlation matrix
corr_matrix = df.corr(method='spearman')
f, ax = plt.subplots(figsize=(16,8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', linewidth=0.4,
            annot_kws={"size": 10}, cmap='coolwarm', ax=ax)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.savefig('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Overtaking_correlation_matrix.png')
plt.show()

In [None]:
# load dataset
values = df.values
values2 = df2.values
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
scaled2 = scaler.fit_transform(values2)
# specify the number of lag hours
#Posibles valores 1 -> 14,  3 -> 5, 5 -> 3, 15 -> 1
n_timesteps = 5
n_features = 14
print(df.shape)
print(df.head())
# split into train and test sets
train =  df.values
test = df2.values
# split into input and outputs
n_obs = n_timesteps * n_features
train_X, train_y = train[:, :n_obs], train[:, -1]
test_X, test_y = test[:, :n_obs], test[:, -1]
print(train_X.shape, len(train_X), train_y.shape)
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_timesteps, -1))
test_X = test_X.reshape((test_X.shape[0], n_timesteps, -1))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

In [None]:
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
train_y_transformed = lb.fit_transform(train_y)
test_y_transformed = lb.transform(test_y)

# Imprimir las primeras 5 etiquetas originales y sus correspondientes matrices binarias
print(train_y[:5])
print(train_y_transformed[:5])

# COMPROBAR TIPO DE LOS DATOS
print(type(train_X))
print(type(train_y_transformed))

# COMPROBAR DIMENSIONES DE LOS DATOS
print(train_X.shape)
print(train_y.shape)
print(train_y_transformed.shape)
print(test_X.shape)
print(test_y.shape)
print(test_y_transformed.shape)

In [None]:
# OBTENER DIMENSION DE LA ENTRADA Y NÚMERO DE SALIDAS
input_shape = (train_X.shape [1] ,)
num_clases = test_y_transformed.shape [1]
# NUMERO DE PATRONES POR CLASES
print('Numero de clases:\n', num_clases)

In [None]:
#ESTRATIFICACION DE LOS DATOS
from sklearn.model_selection import train_test_split
train2_X, Validation_X, train_y_transformed2, Validation_y = train_test_split(train_X,train_y_transformed,stratify=train_y_transformed,test_size=0.2)

In [None]:
#OPCIÓN 2 Early Stopping
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint("content/sample_data/best",monitor='val_loss',verbose=1,save_best_only=True)
# FIJAR SEMILLA ALEATORIA
from numpy.random import seed
from keras.utils import set_random_seed
semilla = 10
seed(semilla)
set_random_seed(semilla)

In [None]:
#ASIGNAR PESOS Y DEFINIR UN DICCIOANRIO
peso_clase0=1.
peso_clase1=2. #Clase1: 2 instancias de clase 0
peso_clase2=4. # Clase2:4 instancias de clase 0
class_weight = {0: peso_clase0, 1: peso_clase1,2: peso_clase2}
#ASIGNAR PESOS EN BASE A LOS DATOS
from sklearn.utils import class_weight
#Es necesario que y_train sea un array
pesos = class_weight.compute_class_weight('balanced', classes=np.unique(train_y),y=train_y)
print(pesos)
#CREACIÓN DEL DICCIONARIO (Una opción)
class_weight = {0: int(pesos[0]+1),1: int(pesos[1]*3),2: int(pesos[2]*3)}
print(class_weight)

In [None]:
#128 neuronas ocultas primera capa
num_hidden_neurons = 128
#Epocas = 100
epochs =100
#Dropout 50% = 0.5
dropout = 0.5 
#batch_size = 64
batch_size = 72
#Funcion de activacion sigmoid
activation = 'relu' 
#1 neurona en la capa de salida
#Mean absolute error (MAE)
#Optimizador Adam (versión eficiente del descenso de gradiente)

model = Sequential()
model.add(LSTM(num_hidden_neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(dropout))
model.add(Dense(num_hidden_neurons, activation = activation))
model.add(Dense(num_clases,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['mse','accuracy'])

cadena = str(epochs) + str(num_hidden_neurons) + activation + str(n_timesteps)

# fit network
history = model.fit(train2_X, train_y_transformed2, epochs=epochs, batch_size=batch_size, callbacks=[checkpoint], validation_data=(Validation_X, Validation_y), verbose=2, shuffle=False, validation_freq=1, class_weight=class_weight)
ephocs_stop=np.where(history.history['val_loss'] == np.min(history.history['val_loss']))
final_epoch=ephocs_stop[0][0]
print(final_epoch)
print(history.history['loss'][final_epoch-1])
print(history.history['val_loss'][final_epoch-1])
print(history.history['accuracy'][final_epoch-1])
print(history.history['val_accuracy'][final_epoch-1])

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.savefig('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_accuracy.png')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.savefig('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_loss.png')
plt.show()
#GUARDAR EN FICHERO LOSS Y ACCURACY EN CADA ÉPOCA
np.savetxt('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_historicoTrainLoss.txt',history.history['loss'])
np.savetxt('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_historicoValLoss.txt',history.history['val_loss'])
np.savetxt('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_historicoTrainAcc.txt',history.history['accuracy'])
np.savetxt('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_historicoValAcc.txt',history.history['val_accuracy'])

In [None]:
# EVALUAR MODELO DEFINITIVO
train_results = model.evaluate(train_X, train_y_transformed, verbose=1)
test_results = model.evaluate(test_X, test_y_transformed, verbose=1)
#EL INDICE 0 ES EL LOSS. EL RESTO, LAS MÉTRICAS QUE SE HAN ESPECIFICADO AL COMPILAR EL MODELO.
#EN ESTE CASO 'accuracy':1,'mse':2
print(f'Train results - Loss: {train_results[0]} - Accuracy: {train_results[1]} - MSE: {train_results[2]}')
print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]} - MSE: {test_results[2]}')

# PREDICCIONES DE LAS CLASES
# PREDICCIONES EN BRUTO
raw_testPred = model.predict(test_X)
print(raw_testPred[:5])
# PREDICCIONES DE LA CLASE
testPred = np.argmax(raw_testPred, axis=1)
class_labels = ['Cont', 'izq', 'dch']
#MATRIZ DE CONFUSIÓN Y OTRAS MÉTRICAS
from sklearn.metrics import confusion_matrix, classification_report
cm=confusion_matrix(test_y, testPred)
print('Matriz de confusión')
print(cm)
# crear mapa de calor dibujar mapa de calor
dataframe = pd.DataFrame(cm)
sns.heatmap(cm, xticklabels = class_labels, yticklabels = class_labels, annot = True, linewidths = 0.1, fmt='d', cmap = 'YlGnBu')
#sns.heatmap(dataframe, annot=True, cbar=None, cmap="Blues")
plt.title("Confusion Matrix"), plt.tight_layout()
plt.ylabel("True Class"), plt.xlabel("Predicted Class")
plt.savefig('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/results/Fase1/Overtaking' + cadena + '_confusionMatrix.png')
plt.show()
print('Classification Report')
print(classification_report(test_y, testPred))

In [None]:
# Crea un nuevo dataframe
output_data = pd.DataFrame()

# Agrega las columnas del dataframe
output_data['Predictions'] = testPred

# Guarda el nuevo dataframe en un archivo
output_data.to_csv('/content/drive/MyDrive/UNIVERSIDAD/CUARTO/TFG/fase2/SinProcesar/Overtaking' + cadena + '.csv', index=False)