# Extracting drivers embeddings

In [1]:
import os
import math
import scipy as sp
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import sklearn
import torch
import torch.nn as nn
import torch.optim as optim
import model_wrappers

In [2]:
cuda = False
if torch.cuda.is_available():
    print("Using CUDA...")
    cuda = True

gpu = 0

In [None]:
data = pd.read_csv("data file path ")

In [None]:
data.shape

In [None]:
data.rename(columns={'Engine_coolant_temperature.1':'transmission_oil_temperature'}, inplace=True)
data.head()

In [None]:
data = data.drop(["Time(s)", "Class", "PathOrder"], axis=1)
data.head(5)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

scaler = StandardScaler()
data[data.columns]  = scaler.fit_transform(data[data.columns])
data.head()

In [15]:
m_features = data.iloc[:, 0:51].values
m_labels = data.iloc[:, 52].values

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(m_features, m_labels, test_size = 0.3)

In [None]:
print('Training data shape : ', X_train.shape, y_train.shape)

print('Testing data shape : ', X_test.shape, y_test.shape)

In [18]:
classes = np.unique(y_train)
nClasses = len(classes)
print('Total number of outputs : ', nClasses)
print('Output classes : ', classes)

Total number of outputs :  10
Output classes :  ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J']


In [19]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
y_train[:, ] = labelencoder_X_1.fit_transform(y_train[:, ])
y_test[:, ] = labelencoder_X_1.fit_transform(y_test[:, ])

In [20]:
y_train = np.asarray(y_train).astype('float32')
y_test =  np.asarray(y_test).astype('float32')

In [21]:
#added for a test
#X_train = np.transpose(np.array(X_train))[2].reshape(1, 1, -1)
#X_test= np.transpose(np.array(X_test))[2].reshape(1, 1, -1)

In [None]:
X_train = X_train.reshape(-1, 51, 1)
X_test = X_test.reshape(-1, 51, 1)
X_train.shape, X_test.shape

In [23]:
# Set to True to train a new model
training = False

# Prefix to path to the saved model
model = 'model path'

In [24]:
hyperparameters = {
    "batch_size":10 ,
    "channels": 30,
    "compared_length": None,
    "depth": 10,
    "nb_steps": 500,
    "in_channels": 51,
    "kernel_size": 3,
    "penalty": None,
    "early_stopping": None,
    "lr": 0.001,
    "nb_random_samples": 10,
    "negative_penalty": 1,
    "out_channels": 160,
    "out_channels": 64,
    "reduced_size": 80,
    "cuda": cuda,
    "gpu": gpu
}


In [None]:
encoder_yearly = model_wrappers.CausalCNNEncoderClassifier()
encoder_yearly.set_params(**hyperparameters)

In [None]:
if training:
    encoder_yearly.fit_encoder(X_train, save_memory=True, verbose=True)
    encoder_yearly.save_encoder(model)
else:
    encoder_yearly.load_encoder(model)

In [27]:
#set true to compute the representations

compute_representations = False
storage_train_day = 'train representation path '
storage_test_day = 'test representation path'


In [None]:
if compute_representations:
    train_features_day = encoder_yearly.encode_window(X_train, 1)
    np.save(storage_train_day, train_features_day)
    test_features_day = encoder_yearly.encode_window(X_test, 1)
    np.save(storage_test_day, test_features_day)
else:
    train_features_day = np.load(storage_train_day)
    test_features_day = np.load(storage_test_day)

In [None]:
train_features_day.shape, test_features_day.shape

In [None]:
train_features_day =train_features_day.reshape(-1,160)
#train_features_day =train_features_day.reshape(-1, 64)
test_features_day = test_features_day.reshape(-1,160)
#test_features_day = test_features_day.reshape(-1, 64)
train_features_day.shape, test_features_day.shape

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


svc = SVC()
svc.fit(train_features_day, y_train)
valid_prediction = svc.predict(test_features_day)
print("validation accuracy : ", accuracy_score(y_test, valid_prediction))

In [None]:
X_train = X_train.reshape(-1, 51)
X_test = X_test.reshape(-1, 51)
X_train.shape, X_test.shape

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svc = SVC()
svc.fit(X_train, y_train)
valid_prediction = svc.predict(X_test)
print("validation accuracy : ", accuracy_score(y_test, valid_prediction))

In [None]:
import matplotlib.pyplot as plt
import scikitplot as skplt
#Normalized confusion matrix for the K-NN model

skplt.metrics.plot_confusion_matrix(y_test, valid_prediction, normalize=True, cmap='GnBu')
plt.show()

In [56]:
#np.save("/Users/mozhi/Desktop/X_train.npy", X_train)
#np.save("/Users/mozhi/Desktop/X_test.npy", X_test)
#np.save("/Users/mozhi/Desktop/y_test.npy", y_test)
#np.save("/Users/mozhi/Desktop/y_train.npy", y_train)
#np.save("/Users/mozhi/Desktop/result.npy", valid_prediction)


In [None]:
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=True,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, valid_prediction)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
#plt.figure()
#plot_confusion_matrix(cnf_matrix, classes=classes,title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

In [None]:
#rand= np.random.choice(test_features_day, 1000, replace=False)

idx = np.random.randint(test_features_day.shape[0], size=20000)
rand = test_features_day[idx, :]

truth = y_test[idx,]

In [None]:
from sklearn.manifold import TSNE
import seaborn as sns

s= TSNE(n_components=2, perplexity=150, random_state=0, n_iter=3000)
tdata= s.fit_transform(rand)

tdata= np.vstack((tdata.T, truth)).T
tdf2 = pd.DataFrame(data=tdata, columns = ("dim1","dim2",'label'))

sns.FacetGrid(tdf2, hue= 'label', height=6).map(plt.scatter,"dim1","dim2").add_legend()
plt.show()