In [None]:
import csv
import sys
import pandas as pd
import os
import glob
import itertools
import numpy as np
from collections import Counter
import math
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

In [None]:
path = '/content/drive/My Drive/AmericanPics/Test_traces/'
all_files = glob.glob(os.path.join(path, "ssdtrace_expanded_FULL.csv0_deathtime_added.csv"))
f = all_files[0]  # Change the file name as required
print("File " + str(f))
df = pd.read_csv(f,skiprows =2,header=None,na_values=['-1'], index_col=False,low_memory=False)
cols = ['IO','device_major','device_minor','cpu_id','timestamp','io_sequence','process_id','default_action','lba','unknown','IO_size','deathtime']
df.columns = cols
lba_list = df['lba'].tolist()
deathtime_list = df['deathtime'].tolist()
print("Min LBA in the dataset :", min(lba_list))
print("Max LBA in the dataset :", max(lba_list))
print("Number of IO Accesses :",len(df))
print("Number of unique LBAs : ",len(Counter(lba_list)))


In [None]:
df.dtypes

In [None]:
# Drop unnecessary coloumns

print("Before drop: {}".format(df.columns))
df.drop(df.columns[[0,1,2,3,5,6,7,9]], axis=1,inplace=True) 
print("After drop: {}".format(df.columns))

In [None]:
df = df.dropna()
df.isnull().sum()

In [None]:
df.tail(5)

In [None]:
# print("Before Drop", len(df))
# df = df.dropna()
# df = df.reset_index(drop=True)
# df = df[df.deathtime == -1.00]
# df = df[df.deathtime == 0.00]
# print("After Drop", len(df))
print(len(Counter(df['deathtime'])))

In [None]:
df.dtypes

In [None]:
|df = df.reset_index(drop=True)
df = df.sort_values(by='timestamp')


In [None]:
num_classes = 10
interval = 100/num_classes
deathtime_list = df['deathtime'].tolist()
deathtime_range_list = []
for x in range(num_classes):
  deathtime_range_list.append(np.percentile(deathtime_list, (x+1)*interval))


In [None]:
counter = 0
deathtime_class = []
while(counter < len(deathtime_list)):
  dt = deathtime_list[counter]
  dt_class = min(deathtime_range_list, key=lambda x:abs(x-dt))
  deathtime_class.append(dt_class)
  counter = counter + 1
print(len(deathtime_class))
print(len(deathtime_list))
df['dt_class'] = deathtime_class

In [None]:
counter = 0
last_n_digits = 5
lba_list = df['lba'].tolist()
lba_high = []
lba_low = []

while(counter < len(lba_list)):
  lba = lba_list[counter]
  high = str(lba)[:-last_n_digits]
  low = str(lba)[-last_n_digits:]
  lba_high.append(int(high))
  lba_low.append(int(low))
  counter = counter + 1
print(len(lba_high))
print(len(lba_low))
df['lba_high'] = lba_high
df['lba_low'] = lba_low

In [None]:
# Normalize lba, deathtime and response time

from sklearn import preprocessing

cols_to_normalize = ['deathtime']
for column in cols_to_normalize:
  new_column = column + '_norm'
  df[new_column] = (df[column] - df[column].min()) / (df[column].max() - df[column].min())     
  

In [None]:
df = df.reset_index(drop=True)

In [None]:
a = df['dt_class'].unique().tolist()
operation_id_map = {}
for i,id in enumerate(a): operation_id_map[id] = i 
df['Output_Class'] = df['dt_class'].map(lambda x: operation_id_map[x])


In [None]:
df.dtypes

In [None]:
# Split to train, validate and test
# Finding the value 75th percentile of TimeStamp
import math

training_pt_1 = math.floor((len(df)*0.75)) 

lba_train =df[:training_pt_1]['lba_norm'].tolist()
lba_test = df[training_pt_1+1:]['lba_norm'].tolist()

death_time_norm_train = df[:training_pt_1]['deathtime_norm'].tolist()
death_time_norm_test = df[training_pt_1+1:]['deathtime_norm'].tolist()

lba_low_train = df[:training_pt_1]['lba_low_norm'].tolist()
lba_low_test = df[training_pt_1+1:]['lba_low_norm'].tolist()

lba_high_train = df[:training_pt_1]['lba_high_norm'].tolist()
lba_high_test =  df[training_pt_1+1:]['lba_high_norm'].tolist()

size_norm_train = df[:training_pt_1]['IO_size'].tolist()
size_norm_test = df[training_pt_1+1:]['IO_size'].tolist()

output_train = df[:training_pt_1]['Output_Class'].tolist()
output_test = df[training_pt_1+1:]['Output_Class'].tolist()


In [None]:
lba_train= np.array(lba_train).reshape(-1,1)
lba_test= np.array(lba_test).reshape(-1,1)

death_time_norm_train= np.array(death_time_norm_train).reshape(-1,1)
death_time_norm_test= np.array(death_time_norm_test).reshape(-1,1)

lba_low_train= np.array(lba_low_train).reshape(-1,1)
lba_low_test= np.array(lba_low_test).reshape(-1,1)

lba_high_train= np.array(lba_high_train).reshape(-1,1)
lba_high_test= np.array(lba_high_test).reshape(-1,1)

size_train= np.array(lba_low_train).reshape(-1,1)
size_test= np.array(lba_low_test).reshape(-1,1)

output_train= np.array(output_train).reshape(-1,1)
output_test= np.array(output_test).reshape(-1,1)

In [None]:
def create_dataset2(dataset, window_size):
    dataX, dataY = [], []
    for i in range(len(dataset) - 2 * window_size):
        a = dataset[i:(i + window_size), 0]
        #print(a)
        dataX.append(a)
        b = dataset[(i + window_size):(i + 2* window_size), 0]
        #print(b)
        dataY.append(b)
    return np.array(dataX), np.array(dataY)

lstm_num_timesteps = 32
    
X_train_lba, y_train_lba = create_dataset2(lba_train, lstm_num_timesteps)
X_test_lba, y_test_lba = create_dataset2(lba_test, lstm_num_timesteps)


X_train_deathtime, y_train_deathtime = create_dataset2(death_time_norm_train, lstm_num_timesteps)
X_test_deathtime, y_test_deathtime = create_dataset2(death_time_norm_test, lstm_num_timesteps)

X_train_lba_low, y_train_lba_low = create_dataset2(lba_low_train, lstm_num_timesteps)
X_test_lba_low, y_test_lba_low = create_dataset2(lba_low_test, lstm_num_timesteps)

X_train_lba_high, y_train_lba_high = create_dataset2(lba_high_train, lstm_num_timesteps)
X_test_lba_high, y_test_lba_high = create_dataset2(lba_high_test, lstm_num_timesteps)

X_size, y_train_size = create_dataset2(size_train, lstm_num_timesteps)
X_size, y_test_size = create_dataset2(size_test, lstm_num_timesteps)

X_train_output, y_train_output = create_dataset2(output_train, lstm_num_timesteps)
X_test_output, y_test_output = create_dataset2(output_test, lstm_num_timesteps)


In [None]:
lstm_num_features = 1
lstm_predict_sequences = True
lstm_num_predictions = 64

y_train_lba = np.reshape(y_train_lba, (y_train_lba.shape[0], lstm_num_predictions, lstm_num_features))
y_test_lba = np.reshape(y_test_lba, (y_test_lba.shape[0], lstm_num_predictions, lstm_num_features))

y_train_deathtime = np.reshape(y_train_deathtime, (y_train_deathtime.shape[0], lstm_num_predictions, lstm_num_features))
y_test_deathtime = np.reshape(y_test_deathtime, (y_test_deathtime.shape[0], lstm_num_predictions, lstm_num_features))

y_train_lba_high = np.reshape(y_train_lba_high, (y_train_lba_high.shape[0], lstm_num_predictions, lstm_num_features))
y_test_lba_high = np.reshape(y_test_lba_high, (y_test_lba_high.shape[0], lstm_num_predictions, lstm_num_features))

y_train_response = np.reshape(y_train_response, (y_train_response.shape[0], lstm_num_predictions, lstm_num_features))
y_test_response = np.reshape(y_test_response, (y_test_response.shape[0], lstm_num_predictions, lstm_num_features))                        

y_train_lba_low = np.reshape(y_train_lba_low, (y_train_lba_low.shape[0], lstm_num_predictions, lstm_num_features))
y_test_lba_low = np.reshape(y_test_lba_low, (y_test_lba_low.shape[0], lstm_num_predictions, lstm_num_features))                        

y_train_size = np.reshape(y_train_size, (y_train_size.shape[0], lstm_num_predictions, lstm_num_features))
y_test_size = np.reshape(y_test_size, (y_test_size.shape[0], lstm_num_predictions, lstm_num_features)) 

y_train_output = np.reshape(y_train_output, (y_train_output.shape[0], lstm_num_predictions, lstm_num_features))
y_test_output = np.reshape(y_test_output, (y_test_output.shape[0], lstm_num_predictions, lstm_num_features))                        


In [None]:
from math import sqrt
import keras 
import tensorflow as tf
from numpy import split
from numpy import array
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
from keras.layers import LSTM
from keras.callbacks import EarlyStopping,TensorBoard
from keras.layers import Dense, Embedding
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import Concatenate, Dense, LSTM, Input, concatenate , Dot
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Activation, Embedding, Dropout, TimeDistributed, Reshape

maxlen= 64
vocabulary_1 = len(Counter(df['lba_norm']))
vocabulary_2 = len(Counter(df['IO_size']))
vocabulary_3 = len(Counter(df['deathtime_norm']))
vocabulary_4 = len(Counter(df['lba_low_norm']))
vocabulary_5 = len(Counter(df['lba_high_norm']))
vocabulary_6 = len(Counter(df['Output_Class']))




hidden_size = 500

input=Input(shape=(no_docs,maxlen),dtype='float64')
inputA=Input(shape=(maxlen,),dtype='float64')  
inputB=Input(shape=(maxlen,),dtype='float64') 
inputC=Input(shape=(maxlen,),dtype='float64') 
inputD=Input(shape=(maxlen,),dtype='float64')
inputE=Input(shape=(maxlen,),dtype='float64') 
inputF=Input(shape=(maxlen,),dtype='float64')  


# the first branch operates on the first input
x = Embedding(input_dim=vocabulary_1,output_dim=hidden_size,input_length=maxlen)(inputA)
x = Model(inputs=inputA, outputs=x)

# # the second branch opreates on the second input
y = Embedding(input_dim=vocabulary_2,output_dim=hidden_size,input_length=maxlen)(inputB)
y = Model(inputs=inputB, outputs=y)

# # the third branch opreates on the third input
z = Embedding(input_dim=vocabulary_3,output_dim=hidden_size,input_length=maxlen)(inputC)
z = Model(inputs=inputC, outputs=z)

# # the fourth branch opreates on the third input
w = Embedding(input_dim=vocabulary_4,output_dim=hidden_size,input_length=maxlen)(inputD)
w = Model(inputs=inputD, outputs=w)

# # the fifth branch opreates on the third input
u = Embedding(input_dim=vocabulary_5,output_dim=hidden_size,input_length=maxlen)(inputE)
u = Model(inputs=inputE, outputs=u)

# # the sixth branch opreates on the third input
v = Embedding(input_dim=vocabulary_6,output_dim=hidden_size,input_length=maxlen)(inputF)
v = Model(inputs=inputF, outputs=v)

# combine the output of the two branches
combined = keras.layers.concatenate([z.output,v.output])

lstm1 = LSTM(hidden_size,return_sequences=True)(combined)
lstm2 = LSTM(hidden_size, return_sequences=True)(lstm1)

# create classification output
output = keras.layers.wrappers.TimeDistributed(Dense(units=vocabulary_6, activation='softmax'), name='output')(lstm2)

model =Model([inputC,inputF],[output]) # combining all into a Keras model

model.compile(optimizer='rmsprop',
              loss={'output': 'sparse_categorical_crossentropy'},
              metrics={ 'output': 'categorical_accuracy'})
model.summary()


In [None]:
import time
num_epochs = 50
batch_size = 64
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=1, save_best_only=True) # save best model


print('Train...')
start_time = time.time()
valid = [y_test_lba,y_test_size,y_train_lba_low,y_test_lba_high,y_test_deathtime,y_test_output],[y_test_output]

model.fit([X_train_lba,X_train_size,X_train_lba_low,X_train_lba_high,X_train_deathtime,X_train_output],[y_train_output],
          verbose=1,epochs=75,batch_size = batch_size,callbacks=[monitor,checkpointer],validation_data=valid)




In [None]:
pred1 = model.predict([(y_test_lba,y_test_size,y_train_lba_low,y_test_lba_high,y_test_deathtime,y_test_output)],verbose =1 )
pred_1 = pred1[:,i,:]
pred1 = np.argmax(pred_1, axis=1)
Counter(pred1)

In [None]:
counter = 0
correct = 0
while(counter < len(pred1)):
  og_label = output_test[counter]
  model_op = pred1[counter]
  counter = counter + 1
  if (og_label == model_op):
       correct = correct + 1
print("Accuracy",correct/len(pred1))