<a href="https://colab.research.google.com/github/Deepak345/answer_evaluator/blob/master/ans_eval(average).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%cd project/ 

/content/project


In [0]:
%rm -rf logs/

In [0]:
# importing for data exploration and analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# classical model - random forest
from sklearn.ensemble import RandomForestClassifier

# deep learning model - cnn, lstm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D,LSTM

# importing for training and test split
from sklearn.model_selection import train_test_split

# Tensorboard for visualisation
from tensorflow.keras.callbacks import TensorBoard
from time import time

In [0]:
data = pd.read_csv("q1.csv")

# converting marks into int to prevent errors during training (as classification task requires int classes)
data = data.astype({'marks' : int})

# removing the nan ans from the dataframe
data = data[data['ans'].notna()]

# after droping the index gets reorganised, resetting it here
data.reset_index(drop= True, inplace= True)

In [5]:
import pickle
with open ('vectors.txt', 'rb') as fp:
    pad_vectors = pickle.load(fp)
pad_vectors.shape

(67, 124, 400)

In [0]:
pad_length = 124

In [0]:
def train_rfc(x_train, x_test, y_train, y_test, n):
    # Reshaping the vector input into 2-d from 3-d as RFC exxcepts <=2d input
    nsamples, nx, ny = x_train.shape
    x_train_rfc = x_train.reshape((nsamples,nx*ny))

    nsamples, nx, ny = x_test.shape
    x_test_rfc = x_test.reshape((nsamples,nx*ny))

    rfc = RandomForestClassifier(n_estimators=26, random_state=200)
    clf = rfc.fit(x_train_rfc, y_train)
    acc = clf.score(x_test_rfc, y_test)*100
    return acc

In [0]:
def train_cnn(x_train, x_test, y_train, y_test, n):
    #Add Sequential
    model_cnn = keras.Sequential()

    # 1st convolutional layer
    model_cnn.add(Dropout(0.2))
    model_cnn.add(Conv1D(64, 4, activation='relu'))
    model_cnn.add(MaxPooling1D(pool_size=8))
    model_cnn.add(Flatten())
    model_cnn.add(Dense(60,activation='relu'))
    model_cnn.add(Dense(6,activation='softmax'))

    model_cnn.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
#     Create a TensorBoard instance with the path to the logs directory
    tensorboard = TensorBoard(log_dir='logs/{}/cnn/{}'.format(n, time()))
    
    model_cnn.fit(x_train, y_train, epochs = 10,  verbose = 1, callbacks=[tensorboard], shuffle=False)

    results = model_cnn.evaluate(x_test, y_test)
    return results[1]*100

In [0]:
def train_lstm(x_train, x_test, y_train, y_test, n):
    model_lstm = keras.Sequential()

    model_lstm.add(Conv1D(input_shape=(pad_length,400), filters = pad_length, kernel_size = 40))
    model_lstm.add(LSTM(96, dropout=0.2))
    model_lstm.add(Dense(24,activation='sigmoid'))
    model_lstm.add(Dense(6,activation='softmax'))

    model_lstm.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#     Create a TensorBoard instance with the path to the logs directory
    tensorboard = TensorBoard(log_dir='logs/{}/lstm/{}'.format(n, time()))

    fitmodel = model_lstm.fit(x_train, y_train, epochs = 15,  verbose = 1, callbacks=[tensorboard], shuffle=False)   

    results = model_lstm.evaluate(x_test, y_test)
    return results[1]*100

In [10]:
%%time

global_acc_rfc_list = []
global_acc_cnn_list = []
global_acc_lstm_list = []
# acc = {}

for training_round in range(30):
    print("TRAINING ROUND:",training_round)
    print("===============================================================================================\n===============================================================================================")
    acc_rfc_list = []
    acc_cnn_list = []
    acc_lstm_list = []
    for n in [round(_ * 0.1, 1) for _ in range(2, 9)]:
        print("Training on:",n,"\n")
    #     splitting data into training and testing set
        x_train, x_test, y_train, y_test = train_test_split(pad_vectors, data['marks'], test_size=round(1-n,1), random_state=22)

    #     calling rfc for training
        rfc_acc = train_rfc(x_train, x_test, y_train, y_test, n)
        acc_rfc_list.append(rfc_acc)
    #     acc[n] = [rfc_acc]

    #     converting input to tensors for input into DL models
        x_train_dl = tf.convert_to_tensor(x_train, np.float32)
        y_train_dl = tf.convert_to_tensor(y_train, np.float32)

        x_test_dl = tf.convert_to_tensor(x_test, np.float32)
        y_test_dl = tf.convert_to_tensor(y_test, np.float32)

        print("CNN Training--------------------------------------------",n,"\n")    
    #     calling cnn for training
        cnn_acc = train_cnn(x_train_dl, x_test_dl, y_train_dl, y_test_dl, n)
        acc_cnn_list.append(cnn_acc)
    #     acc[n].append(cnn_acc)

        print("\nLSTM Training--------------------------------------------",n,"\n")
    #     calling lstm for training
        lstm_acc = train_lstm(x_train_dl, x_test_dl, y_train_dl, y_test_dl, n)
        acc_lstm_list.append(lstm_acc)
    #     acc[n].append(lstm_acc)
        
        print("\n-------------------------------------------------------------------------------------------")
        print("-------------------------------------------------------------------------------------------")
        print("-------------------------------------------------------------------------------------------\n\n\n\n")
    
    global_acc_rfc_list.append(acc_rfc_list)
    global_acc_cnn_list.append(acc_cnn_list)
    global_acc_lstm_list.append(acc_lstm_list)
    
    print("===============================================================================================\n===============================================================================================\n===============================================================================================\n===============================================================================================\n\n\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

LSTM Training-------------------------------------------- 0.6 

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15

-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------




Training on: 0.7 

CNN Training-------------------------------------------- 0.7 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

LSTM Training-------------------------------------------- 0.7 

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch

In [0]:
rfc_accuracy = np.average(global_acc_rfc_list, axis=0)
cnn_accuracy = np.average(global_acc_cnn_list, axis=0)
lstm_accuracy = np.average(global_acc_lstm_list, axis=0)

In [12]:
acc = pd.DataFrame(list(zip(rfc_accuracy, cnn_accuracy, lstm_accuracy)), index =['0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8'], columns =['RFC', 'CNN', 'LSTM']) 
acc

Unnamed: 0,RFC,CNN,LSTM
0.2,35.185185,45.679012,45.185186
0.3,44.680851,44.822695,52.907801
0.4,39.02439,49.756098,51.951221
0.5,52.941176,56.274511,54.803923
0.6,51.851852,53.703705,54.444446
0.7,57.142857,54.285716,63.174605
0.8,42.857143,62.142858,74.761905


In [13]:
print("RFC: ",global_acc_rfc_list)
print("CNN: ",global_acc_cnn_list)
print("LSTM: ",global_acc_lstm_list)

RFC:  [[35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.680851063829785, 39.02439024390244, 52.94117647058824, 51.85185185185185, 57.14285714285714, 42.857142857142854], [35.18518518518518, 44.68085106382