In [32]:
from google.colab import drive
drive.mount('/content/mydrive')

Drive already mounted at /content/mydrive; to attempt to forcibly remount, call drive.mount("/content/mydrive", force_remount=True).


In [33]:
#change the current directory to the path where the given file is residing
import os
os.chdir('/content/mydrive/My Drive/Colab Notebooks')
os.getcwd()

'/content/mydrive/My Drive/Colab Notebooks'

In [34]:
import warnings
warnings.filterwarnings('ignore')

In [35]:
import pandas as pd
import numpy as np

In [36]:
#raw data
raw_x_train =  pd.read_pickle('raw200_x_train.pkl')
raw_y_train =  np.array(pd.read_pickle('raw200_y_train.pkl'))

raw_x_test = pd.read_pickle('raw200_x_test.pkl')
raw_y_test = np.array(pd.read_pickle('raw200_y_test.pkl'))

raw_x_val = pd.read_pickle('raw200_x_val.pkl')
raw_y_val = np.array(pd.read_pickle('raw200_y_val.pkl'))

In [37]:
#accuracy score dataframe
accuracy_df = pd.DataFrame([], columns = ['Train_Precision', 'Train_Recall','Train_Auc', 'Train_Accuracy','Val_Precision', 
                                          'Val_Recall', 'Val_F1','Val_Auc', 'Val_Accuracy','Test_Precision', 'Test_Recall', 
                                          'Test_F1', 'Test_Auc', 'Test_Accuracy'])
accuracy_df.head()

Unnamed: 0,Train_Precision,Train_Recall,Train_Auc,Train_Accuracy,Val_Precision,Val_Recall,Val_F1,Val_Auc,Val_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Auc,Test_Accuracy


In [38]:
print('maximum_length of train data:', raw_x_train.str.split(' ').map(lambda x: len(x)).max())
print('maximum_length of validation data:', raw_x_val.str.split(' ').map(lambda x: len(x)).max())
print('maximum_length of test data:', raw_x_test.str.split(' ').map(lambda x: len(x)).max())

maximum_length of train data: 204
maximum_length of validation data: 214
maximum_length of test data: 208


In [39]:
max_len = 214

!!!!!!!!!!! IMPORTANT NOTE !!!!!!!!!!      
       
The below set of codes can be done only for scratch models and not for pretrained ones.           

For pretrained models, we need to use the respective tokenizers.

<< START OF THE CODE >>

In [40]:
#now collect all the tokens from the data file
dist_list = []

for i in raw_x_train:
  dist_list.extend(i.split(' '))

for i in raw_x_val:
  dist_list.extend(i.split(' '))

for i in raw_x_test:
  dist_list.extend(i.split(' '))

In [41]:
#remove the duplicates
final_dist_list = list(set(dist_list))

In [42]:
word_dict = {}
index1 = 0

#create index for the above extracted words
for word in final_dist_list:
  index1 += 1
  word_dict[word] = index1

In [43]:
word_dict

{'verified': 1,
 'sound': 2,
 'exit': 3,
 'automation': 4,
 'suspect': 5,
 'therefore': 6,
 'interactive': 7,
 'window': 8,
 'desk': 9,
 'shutdown': 10,
 'echo': 11,
 'prompt': 12,
 'directory': 13,
 'tool': 14,
 'indra': 15,
 'incoming': 16,
 'subject': 17,
 'believe': 18,
 'touch': 19,
 'queue': 20,
 'ee': 21,
 'thought': 22,
 'bit': 23,
 'eb': 24,
 'rectify': 25,
 'dot': 26,
 'ticketing': 27,
 'sherlock': 28,
 'forgot': 29,
 'create': 30,
 'normally': 31,
 'periodic': 32,
 'operation': 33,
 'retrieve': 34,
 'ad': 35,
 'common': 36,
 'bob': 37,
 'sale': 38,
 'nc': 39,
 'full': 40,
 'macro': 41,
 'previously': 42,
 'typing': 43,
 'computer': 44,
 'station': 45,
 'rad': 46,
 'connect': 47,
 'network': 48,
 'exchange': 49,
 'bottom': 50,
 'reschedule': 51,
 'nearly': 52,
 'fault': 53,
 'publish': 54,
 'identify': 55,
 'pe': 56,
 'ear': 57,
 'dock': 58,
 'image': 59,
 'bi': 60,
 'exist': 61,
 'territory': 62,
 'compliance': 63,
 'toll': 64,
 'ticket': 65,
 'loop': 66,
 'permanent': 67,
 

In [44]:
def process_input(x):
  final_value = []
  split_x = x.split(' ')
  for word in split_x:
    value = word_dict[word]
    final_value.append(value)
  return final_value

In [45]:
input_text = raw_x_train.apply(process_input)
val_text = raw_x_val.apply(process_input)
test_text = raw_x_test.apply(process_input)

In [46]:
vocab_size = len(word_dict) + 1

<<< END OF THE CODE >>>

In [47]:
#now use sequence padding to post append zeros inorder to equalize the sentence structure for all the records
from tensorflow.keras.preprocessing.sequence import pad_sequences
input_final = pad_sequences(maxlen = max_len, sequences = input_text, padding="post")
val_final =  pad_sequences(maxlen = max_len, sequences = val_text, padding = "post")
test_final = pad_sequences(maxlen = max_len, sequences = test_text, padding = "post")

When the input is heavily imbalanced. 

=> We have to use class weights, which increase the loss value inversely proportional to the frequency of the class.

=> Using bias_intializer will make the model converge faster. This is done by taking log(corresponding class/ total - corresponding class)

=> Applying weighted metrics in the model evaluation(train, validation and test)



In [48]:
#calculating initial bias
total_count = raw_x_train.shape[0]

groups = pd.DataFrame(np.argmax(raw_y_train, axis=1), columns = ['Assignment group'])
group_count = groups['Assignment group'].value_counts().reset_index()
group_count.sort_values('index', inplace=True)

In [49]:
group_count

Unnamed: 0,index,Assignment group
0,0,2978
4,1,183
5,2,168
3,3,191
1,4,531
2,5,201


In [50]:
group_count['denominator'] = total_count - group_count['Assignment group'] 
group_count['initial bias'] = np.log(group_count['Assignment group'] / group_count['denominator'])
initial_bias = np.array(group_count['initial bias'])

In [51]:
initial_bias

array([ 0.84909038, -3.1016664 , -3.1908682 , -3.0569111 , -1.94698571,
       -3.00341414])

In [52]:
import numpy as np
from tensorflow import keras
from sklearn.utils.class_weight import compute_class_weight

y_integers = np.argmax(raw_y_train, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
d_class_weights = dict(enumerate(class_weights))

y_integers = np.argmax(raw_y_val, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
d_class_vweights = dict(enumerate(class_weights))

y_integers = np.argmax(raw_y_test, axis=1)
class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
d_class_tweights = dict(enumerate(class_weights))

In [53]:
d_class_weights

{0: 0.23796731587194986,
 1: 3.8724954462659382,
 2: 4.218253968253968,
 3: 3.7102966841186737,
 4: 1.3345888261142498,
 5: 3.525704809286899}

In [54]:
from tensorflow.keras.metrics import Precision, Recall, AUC, CategoricalAccuracy

METRICS = [      
      Precision(name='precision'),
      Recall(name='recall'),
      AUC(name='auc'),
      CategoricalAccuracy(name='accuracy')
]

In [135]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Flatten, Dense
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

model = Sequential()          #Defining a sequential layer
model.add(Embedding(input_dim = vocab_size, output_dim = 300, input_length = max_len ))
model.add(Bidirectional(LSTM(units=100, dropout=0.2))) #LSTM return sequences
model.add(Flatten())

#!!!! IMPORTANT !!!!
#can test with both giving initial bias and without giving initial bias

model.add(Dense(6, activation='softmax', bias_initializer = tf.keras.initializers.Constant(initial_bias)))
# model.add(Dense(44, activation='softmax'))


In [136]:
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_18 (Embedding)     (None, 214, 300)          518700    
_________________________________________________________________
bidirectional_17 (Bidirectio (None, 200)               320800    
_________________________________________________________________
flatten_14 (Flatten)         (None, 200)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 6)                 1206      
Total params: 840,706
Trainable params: 840,706
Non-trainable params: 0
_________________________________________________________________


In [137]:
model.compile(optimizer="Adam", loss="categorical_crossentropy", weighted_metrics=[METRICS])

###For validation and test we need to add samplewise weights 

In [138]:
#training dataset
y_train_trans = np.argmax(raw_y_train, axis=1)

train_weight = []
#calculate class weight for each sample in validation data
for i in y_train_trans:
  train_weight.append(d_class_weights[i])

train_weights = np.array(train_weight)

##validation dataset
y_val_trans = np.argmax(raw_y_val, axis=1)

val_weight = []
#calculate class weight for each sample in validation data
for i in y_val_trans:
  val_weight.append(d_class_vweights[i])

val_weights = np.array(val_weight)

#test dataset
y_test_trans = np.argmax(raw_y_test, axis=1)

test_weight = []
#calculate class weight for each sample in validation data
for i in y_test_trans:
  test_weight.append(d_class_tweights[i])

test_weights = np.array(test_weight)

In [139]:
checkpoint = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto') 
reduceLoss = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=2, min_lr=0.0001)

In [140]:
history = model.fit(input_final, raw_y_train, batch_size=32, class_weight = d_class_weights, epochs=8,callbacks=[checkpoint,reduceLoss],validation_data =(val_final, raw_y_val, val_weights), verbose=1)

Epoch 1/8
Epoch 00001: val_accuracy improved from -inf to 0.60643, saving model to model-001-0.606431.h5
Epoch 2/8
Epoch 00002: val_accuracy improved from 0.60643 to 0.61117, saving model to model-002-0.611171.h5
Epoch 3/8
Epoch 00003: val_accuracy improved from 0.61117 to 0.69532, saving model to model-003-0.695323.h5
Epoch 4/8
Epoch 00004: val_accuracy did not improve from 0.69532
Epoch 5/8
Epoch 00005: val_accuracy did not improve from 0.69532
Epoch 6/8
Epoch 00006: val_accuracy did not improve from 0.69532
Epoch 7/8
Epoch 00007: val_accuracy did not improve from 0.69532
Epoch 8/8
Epoch 00008: val_accuracy did not improve from 0.69532


In [141]:
model.load_weights('model-003-0.695323.h5')

In [142]:
# import matplotlib.pyplot as plt
# import matplotlib as mpl

# mpl.rcParams['figure.figsize'] = (10, 7)
# colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

# def plot_metrics(history):
#   metrics =  ['loss', 'auc', 'precision', 'recall']
#   for n, metric in enumerate(metrics):
#     name = metric.replace("_"," ").capitalize()
#     plt.subplot(2,2,n+1)
#     plt.plot(history.epoch,  history.history[metric], color=colors[0], label='Train')
#     plt.plot(history.epoch, history.history['val_'+metric],
#              color=colors[0], linestyle="--", label='Val')
#     plt.xlabel('Epoch')
#     plt.ylabel(name)
#     if metric == 'loss':
#       plt.ylim([0, plt.ylim()[1]])
#     elif metric == 'auc':
#       plt.ylim([0.8,1])
#     else:
#       plt.ylim([0,1])

#     plt.legend()


# plot_metrics(history)

In [143]:
from sklearn.metrics import f1_score

In [144]:
def update_accuracy_df(prec, recall, auc, accur, f1, model_name, dataset_name):
  accuracy_df.loc[model_name, dataset_name + '_Precision'] = prec * 100
  accuracy_df.loc[model_name, dataset_name + '_Recall'] = recall * 100
  accuracy_df.loc[model_name, dataset_name + '_Auc'] = auc * 100
  accuracy_df.loc[model_name, dataset_name + '_Accuracy'] = accur * 100
  if dataset_name != 'Train':
    accuracy_df.loc[model_name, dataset_name + '_F1'] = f1 * 100

Train_WPrecision, Train_WRecall, Train_WAuc, Train_WAccuracy = model.evaluate(input_final, raw_y_train, sample_weight = train_weights)[1:]
update_accuracy_df(Train_WPrecision, Train_WRecall, Train_WAuc, Train_WAccuracy, ' ', 'LSTM', 'Train' )

Val_WPrecision, Val_WRecall, Val_WAuc, Val_WAccuracy = model.evaluate(val_final, raw_y_val, sample_weight = val_weights)[1:]
predict_val = model.predict(val_final)
Val_WF1 = f1_score(np.argmax(raw_y_val, axis=1), np.argmax(predict_val, axis=1), average='weighted')
update_accuracy_df(Val_WPrecision, Val_WRecall, Val_WAuc, Val_WAccuracy, Val_WF1, 'LSTM', 'Val' )

Test_WPrecision, Test_WRecall, Test_WAuc, Test_WAccuracy = model.evaluate(test_final, raw_y_test, sample_weight = test_weights)[1:]
predict_test = model.predict(test_final)
Test_WF1 = f1_score(np.argmax(raw_y_test, axis=1), np.argmax(predict_test, axis=1), average='weighted')
update_accuracy_df(Test_WPrecision, Test_WRecall, Test_WAuc, Test_WAccuracy, Test_WF1, 'LSTM', 'Test' )



Repeating the same procedure for balanced dataset where class_weights are not required

In [145]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Flatten, Dense
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

model = Sequential()          #Defining a sequential layer
model.add(Embedding(input_dim = vocab_size, output_dim = 300, input_length = max_len ))
model.add(Bidirectional(LSTM(units=100, dropout=0.2))) #LSTM return sequences
model.add(Flatten())

#!!!! IMPORTANT !!!!
#can test with both giving initial bias and without giving initial bias

# model.add(Dense(44, activation='softmax', bias_initializer = tf.keras.initializers.Constant(initial_bias)))
model.add(Dense(6, activation='softmax'))

model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_19 (Embedding)     (None, 214, 300)          518700    
_________________________________________________________________
bidirectional_18 (Bidirectio (None, 200)               320800    
_________________________________________________________________
flatten_15 (Flatten)         (None, 200)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 6)                 1206      
Total params: 840,706
Trainable params: 840,706
Non-trainable params: 0
_________________________________________________________________


In [146]:
model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=[METRICS])

checkpoint = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto') 
reduceLoss = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=2, min_lr=0.0001)

history = model.fit(input_final, raw_y_train, batch_size=32,callbacks=[checkpoint,reduceLoss], epochs=8, validation_data =(val_final, raw_y_val), verbose=1)

# plot_metrics(history)

Epoch 1/8
Epoch 00001: val_accuracy improved from -inf to 0.83080, saving model to model-001-0.830799.h5
Epoch 2/8
Epoch 00002: val_accuracy improved from 0.83080 to 0.84981, saving model to model-002-0.849810.h5
Epoch 3/8
Epoch 00003: val_accuracy did not improve from 0.84981
Epoch 4/8
Epoch 00004: val_accuracy improved from 0.84981 to 0.85551, saving model to model-004-0.855513.h5
Epoch 5/8
Epoch 00005: val_accuracy did not improve from 0.85551
Epoch 6/8
Epoch 00006: val_accuracy did not improve from 0.85551
Epoch 7/8
Epoch 00007: val_accuracy did not improve from 0.85551
Epoch 8/8
Epoch 00008: val_accuracy did not improve from 0.85551


In [147]:
model.load_weights('model-004-0.855513.h5')

In [148]:
Train_WPrecision, Train_WRecall, Train_WAuc, Train_WAccuracy = model.evaluate(input_final, raw_y_train)[1:]
update_accuracy_df(Train_WPrecision, Train_WRecall, Train_WAuc, Train_WAccuracy, ' ', 'Noweight_LSTM', 'Train' )

Val_WPrecision, Val_WRecall, Val_WAuc, Val_WAccuracy = model.evaluate(val_final, raw_y_val)[1:]
predict_val = model.predict(val_final)
Val_WF1 = f1_score(np.argmax(raw_y_val, axis=1), np.argmax(predict_val, axis=1), average='weighted')
update_accuracy_df(Val_WPrecision, Val_WRecall, Val_WAuc, Val_WAccuracy, Val_WF1, 'Noweight_LSTM', 'Val' )

Test_WPrecision, Test_WRecall, Test_WAuc, Test_WAccuracy = model.evaluate(test_final, raw_y_test)[1:]
predict_test = model.predict(test_final)
Test_WF1 = f1_score(np.argmax(raw_y_test, axis=1), np.argmax(predict_test, axis=1), average='weighted')
update_accuracy_df(Test_WPrecision, Test_WRecall, Test_WAuc, Test_WAccuracy, Test_WF1, 'Noweight_LSTM', 'Test' )



SVM

In [149]:
#combining train, validation and text to create a vocabulary
combined_data = pd.concat([raw_x_train, raw_x_val])
combined_data_final = pd.concat([combined_data, raw_x_test])

combined_data_final.shape

(5251,)

In [150]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [151]:
Tfidf_vect = TfidfVectorizer(max_features=7000)
Tfidf_vect.fit(combined_data_final)

Train_X_Tfidf = Tfidf_vect.transform(raw_x_train)
Val_X_Tfidf = Tfidf_vect.transform(raw_x_val)
Test_X_Tfidf = Tfidf_vect.transform(raw_x_test)

In [152]:
print(Tfidf_vect.vocabulary_)



In [153]:
print(Train_X_Tfidf)

  (0, 1640)	0.4789835996486904
  (0, 1275)	0.43320059604353695
  (0, 1061)	0.403486512155157
  (0, 205)	0.6481593857717204
  (1, 1595)	0.17575507669189713
  (1, 1153)	0.1958293545252801
  (1, 1061)	0.23880879369757899
  (1, 990)	0.21177901790722292
  (1, 975)	0.1893253669186749
  (1, 958)	0.2862992930907054
  (1, 955)	0.3294707405366598
  (1, 917)	0.17792633057439886
  (1, 852)	0.17664655267837107
  (1, 849)	0.16058198241696794
  (1, 766)	0.18813424327854092
  (1, 643)	0.21810249875146984
  (1, 638)	0.2395659741178729
  (1, 616)	0.28863689229788975
  (1, 547)	0.24714093228990805
  (1, 373)	0.20282285268481742
  (1, 281)	0.20625600896716523
  (1, 232)	0.17811228808066296
  (1, 3)	0.3372280174373562
  (2, 1654)	0.485953354288012
  (2, 1640)	0.35572301701046494
  :	:
  (4247, 276)	0.16614040054204307
  (4247, 232)	0.12545828342677484
  (4247, 12)	0.09792353256598829
  (4248, 1705)	0.3089910622968385
  (4248, 1038)	0.34219020391329397
  (4248, 1017)	0.3886858382256509
  (4248, 990)	0.46788

In [154]:
train_y = np.argmax(raw_y_train, axis=1)
val_y = np.argmax(raw_y_val, axis=1)
test_y = np.argmax(raw_y_test, axis=1)

In [155]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier

SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto', class_weight='balanced', random_state=100)
SVM.fit(Train_X_Tfidf,train_y)

# predict the labels on validation dataset

train_pred_SVM = SVM.predict(Train_X_Tfidf)
val_pred_SVM = SVM.predict(Val_X_Tfidf)
test_pred_SVM = SVM.predict(Test_X_Tfidf)

# # Use accuracy_score function to get the accuracy
# print("SVM Accuracy Score -> ", f1_score(predictions_SVM, val_y, average='weighted')*100)

In [156]:

Train_WPrecision = precision_score(train_pred_SVM, train_y, average='weighted',sample_weight = train_weights)
Train_WRecall = recall_score(train_pred_SVM, train_y, average='weighted',sample_weight = train_weights)
# Train_WAuc = roc_auc_score(train_pred_SVM, train_y, average='weighted' )
Train_WAuc = 1
Train_WAccuracy = accuracy_score(train_pred_SVM, train_y,sample_weight = train_weights)
Train_WF1 = f1_score(train_pred_SVM, train_y, average='weighted',sample_weight = train_weights)

Val_WPrecision = precision_score(val_pred_SVM, val_y, average='weighted',sample_weight = val_weights)
Val_WRecall = recall_score(val_pred_SVM, val_y, average='weighted',sample_weight = val_weights)
# Val_WAuc = roc_auc_score(val_pred_SVM, val_y, average='weighted')
Val_WAuc = 1
Val_WAccuracy = accuracy_score(val_pred_SVM, val_y,sample_weight = val_weights)
Val_WF1 = f1_score(val_pred_SVM, val_y, average='weighted',sample_weight = val_weights)

Test_WPrecision = precision_score(test_pred_SVM, test_y, average='weighted',sample_weight = test_weights)
Test_WRecall = recall_score(test_pred_SVM, test_y, average='weighted',sample_weight = test_weights)
# Test_WAuc = roc_auc_score(test_pred_SVM, test_y, average='weighted')
Test_WAuc = 1
Test_WAccuracy = accuracy_score(test_pred_SVM, test_y,sample_weight = test_weights)
Test_WF1 = f1_score(test_pred_SVM, test_y, average='weighted',sample_weight = test_weights)

update_accuracy_df(Train_WPrecision, Train_WRecall, Train_WAuc, Train_WAccuracy, Train_WF1, 'SVM', 'Train' )
update_accuracy_df(Val_WPrecision, Val_WRecall, Val_WAuc, Val_WAccuracy, Val_WF1, 'SVM', 'Val' )
update_accuracy_df(Test_WPrecision, Test_WRecall, Test_WAuc, Test_WAccuracy, Test_WF1, 'SVM', 'Test' )


In [157]:
# Classifier - Algorithm - SVM
# fit the training dataset on the classifier

SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto', random_state = 50)
SVM.fit(Train_X_Tfidf,train_y)

# predict the labels on validation dataset

train_pred_SVM = SVM.predict(Train_X_Tfidf)
val_pred_SVM = SVM.predict(Val_X_Tfidf)
test_pred_SVM = SVM.predict(Test_X_Tfidf)

# # Use accuracy_score function to get the accuracy
# print("SVM Accuracy Score -> ", f1_score(predictions_SVM, val_y, average='weighted')*100)

In [158]:
Train_WPrecision = precision_score(train_pred_SVM, train_y, average='weighted')
Train_WRecall = recall_score(train_pred_SVM, train_y, average='weighted')
# Train_WAuc = roc_auc_score(train_pred_SVM, train_y, average='weighted' )
Train_WAuc = 1
Train_WAccuracy = accuracy_score(train_pred_SVM, train_y)
Train_WF1 = f1_score(train_pred_SVM, train_y, average='weighted')

Val_WPrecision = precision_score(val_pred_SVM, val_y, average='weighted')
Val_WRecall = recall_score(val_pred_SVM, val_y, average='weighted')
# Val_WAuc = roc_auc_score(val_pred_SVM, val_y, average='weighted')
Val_WAuc = 1
Val_WAccuracy = accuracy_score(val_pred_SVM, val_y)
Val_WF1 = f1_score(val_pred_SVM, val_y, average='weighted')

Test_WPrecision = precision_score(test_pred_SVM, test_y, average='weighted')
Test_WRecall = recall_score(test_pred_SVM, test_y, average='weighted')
# Test_WAuc = roc_auc_score(test_pred_SVM, test_y, average='weighted')
Test_WAuc = 1
Test_WAccuracy = accuracy_score(test_pred_SVM, test_y)
Test_WF1 = f1_score(test_pred_SVM, test_y, average='weighted')

update_accuracy_df(Train_WPrecision, Train_WRecall, Train_WAuc, Train_WAccuracy, Train_WF1, 'Noweight_SVM', 'Train' )
update_accuracy_df(Val_WPrecision, Val_WRecall, Val_WAuc, Val_WAccuracy, Val_WF1, 'Noweight_SVM', 'Val' )
update_accuracy_df(Test_WPrecision, Test_WRecall, Test_WAuc, Test_WAccuracy, Test_WF1, 'Noweight_SVM', 'Test' )

In [159]:
accuracy_df

Unnamed: 0,Train_Precision,Train_Recall,Train_Auc,Train_Accuracy,Val_Precision,Val_Recall,Val_F1,Val_Auc,Val_Accuracy,Test_Precision,Test_Recall,Test_F1,Test_Auc,Test_Accuracy
LSTM,84.5375,80.5474,98.1361,83.3313,71.6984,64.2457,77.1129,92.1959,69.5323,74.2737,66.1951,77.8682,94.4795,71.4673
Noweight_LSTM,93.2025,91.5804,99.4652,92.6388,86.2745,83.6502,82.7235,97.2644,85.5513,87.1739,84.778,82.9562,97.8949,85.8351
SVM,90.9212,86.2812,100.0,86.2812,68.9698,62.365,62.3466,100.0,62.365,73.4655,70.6768,70.1633,100.0,70.6768
Noweight_SVM,95.4168,89.5108,100.0,89.5108,92.548,84.2205,87.6566,100.0,84.2205,95.7585,86.0465,89.909,100.0,86.0465


In [160]:
accuracy_df.to_excel('report.xlsx')