# RNN Edu
Vinitra Swamy, Madeline Wu, Wilton Wu

In [1]:
import pandas as pd
import numpy as np
import json
from sklearn import model_selection
from sklearn import metrics 

filename = 'skill_builder_data_corrected.csv'
df = pd.read_csv(filename, encoding='ISO-8859-1', low_memory=False)
df = df[(df['original'] == 1) & (df['attempt_count'] == 1) & ~(df['skill_name'].isnull())]

In [4]:
pd.set_option('display.max_columns', 500)
df.head()

Unnamed: 0,order_id,assignment_id,user_id,assistment_id,problem_id,original,correct,attempt_count,ms_first_response,tutor_mode,answer_type,sequence_id,student_class_id,position,type,base_sequence_id,skill_id,skill_name,teacher_id,school_id,hint_count,hint_total,overlap_time,template_id,answer_id,answer_text,first_action,bottom_hint,opportunity,opportunity_original
0,33022537,277618,64525,33139,51424,1,1,1,32454,tutor,algebra,5948,13241,126,MasterySection,5948,1.0,Box and Whisker,22763,73,0,3,32454,30799,,26,0,,1,1.0
1,33022709,277618,64525,33150,51435,1,1,1,4922,tutor,algebra,5948,13241,126,MasterySection,5948,1.0,Box and Whisker,22763,73,0,3,4922,30799,,55,0,,2,2.0
3,35450295,220674,70363,33110,51395,1,1,1,4859,tutor,algebra,5948,11816,22,MasterySection,5948,1.0,Box and Whisker,22763,73,0,3,4859,30059,,41,0,,2,2.0
5,35450555,220674,70363,33172,51457,1,1,1,16031,tutor,algebra,5948,11816,22,MasterySection,5948,1.0,Box and Whisker,22763,73,0,4,16031,30060,,12,0,,4,4.0
6,35450573,220674,70363,33174,51459,1,1,1,15047,tutor,algebra,5948,11816,22,MasterySection,5948,1.0,Box and Whisker,22763,73,0,4,15047,30060,,6,0,,5,5.0


In [3]:
def generate_datasets():
    users_list = df['user_id'].unique()
    skill_list = df['skill_name'].unique()

    # 不用原来的skill_id，因为有多skill的情况。
    # 把所有不同skill_name都当成一个单独skill
    # 但是这种处理方法还是不对！
    '''
        For the skill builder dataset, different skills for the same data record 
        are in different rows. This means if a student answers a multi skill 
        question, this record is duplicated several times, and each duplication 
        is tagged with one of the multi skills.    
    '''
    skill_dict = dict(zip(skill_list, np.arange(len(skill_list), dtype='int32') + 1))
    response_list = []
    skill_list = []
    assistment_list = []
    
    counter = 0
    for user in users_list:
        # 只取一个学生的
        sub_df = df[df['user_id'] == user]
        if len(sub_df) > 100:
            # 每个学生只取前100次回答
            first_hundred = sub_df.iloc[0:100]
            response_df = pd.DataFrame(index=[counter], columns=['student_id']+['r'+str(i) for i in range(100)])
            skill_df = pd.DataFrame(index=[counter], columns=['student_id']+['s'+str(i) for i in range(100)])
            assistment_df = pd.DataFrame(index=[counter], columns=['student_id']+['a'+str(i) for i in range(100)])
            
            # 第一格都是user_id
            response_df.iloc[0, 0] = first_hundred.iloc[0]['user_id']
            skill_df.iloc[0, 0] = first_hundred.iloc[0]['user_id']
            assistment_df.iloc[0, 0] = first_hundred.iloc[0]['user_id']
            
            # 用到 user_id, correct, skill_name, assistment_id 这几个列
            for i in range(100):
                response_df.iloc[0, i+1] = first_hundred.iloc[i]['correct']
                skill_df.iloc[0, i+1] = skill_dict[first_hundred.iloc[i]['skill_name']]
                assistment_df.iloc[0, i+1] = first_hundred.iloc[i]['assistment_id']
            counter += 1
            response_list.append(response_df)
            skill_list.append(skill_df)
            assistment_list.append(assistment_df)
    
    response_df = pd.concat(response_list)
    skill_df = pd.concat(skill_list)
    assistment_df = pd.concat(assistment_list)
    
    return skill_dict, response_df, skill_df, assistment_df

# 这样写很好。明确上一步结果是这4个东西！
skill_dict, response_df, skill_df, assistment_df = generate_datasets()

with open('skill_dict.json', 'w', encoding='utf-8') as f:
    to_dump_dict = {}
    for key, value in skill_dict.items():
        to_dump_dict[key] = str(value)
    json.dump(to_dump_dict, f)
response_df.to_csv('correct.tsv', sep='\t')
skill_df.to_csv('skill.tsv', sep='\t')
assistment_df.to_csv('assistment_id.tsv', sep='\t')
print('Done')

Done


In [3]:
response_df = pd.read_csv('correct.tsv', sep='\t').drop('Unnamed: 0', axis=1)
skill_df = pd.read_csv('skill.tsv', sep='\t').drop('Unnamed: 0', axis=1)
assistment_df = pd.read_csv('assistment_id.tsv', sep='\t').drop('Unnamed: 0', axis=1)
skill_dict = {}
with open('skill_dict.json', 'r', encoding='utf-8') as f:
    loaded = json.load(f)
    for k, v in loaded.items():
        skill_dict[k] = int(v)

skill_num = len(skill_dict) + 1 # including 0

def one_hot(skill_matrix, vocab_size):
    '''
    params:
        skill_matrix: 2-D matrix (student, skills)
        vocal_size: size of the vocabulary
    returns:
        a ndarray with a shape like (student, sequence_len, vocab_size)
    '''
    seq_len = skill_matrix.shape[1]
    result = np.zeros((skill_matrix.shape[0], seq_len, vocab_size))
    for i in range(skill_matrix.shape[0]):
        result[i, np.arange(seq_len), skill_matrix[i]] = 1.
    return result

def dkt_one_hot(skill_matrix, response_matrix, vocab_size):
    seq_len = skill_matrix.shape[1]
    skill_response_array = np.zeros((skill_matrix.shape[0], seq_len, 2 * vocab_size))
    for i in range(skill_matrix.shape[0]):
        skill_response_array[i, np.arange(seq_len), 2 * skill_matrix[i] + response_matrix[i]] = 1.
    return skill_response_array

def preprocess(skill_df, response_df, skill_num):
    skill_matrix = skill_df.iloc[:, 1:].values
    response_array = response_df.iloc[:, 1:].values
    skill_array = one_hot(skill_matrix, skill_num)
    skill_response_array = dkt_one_hot(skill_matrix, response_array, skill_num)
    return skill_array, response_array, skill_response_array
    

skill_array, response_array, skill_response_array = preprocess(skill_df, response_df, skill_num)

In [5]:
import keras
from keras.layers import Input, Dense, LSTM, TimeDistributed, Lambda, multiply
from keras.models import Model
from keras.optimizers import RMSprop, Adam
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K

def build_skill2skill_model(input_shape, lstm_dim=32, dropout=0.0):
    input = Input(shape=input_shape, name='input_skills')
    lstm = LSTM(lstm_dim, 
                return_sequences=True, 
                dropout=dropout,
                name='lstm_layer')(input)
    output = TimeDistributed(Dense(input_shape[-1], activation='softmax'), name='probability')(lstm)
    model = Model(inputs=[input], outputs=[output])
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.0)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

def reduce_dim(x):
    x = K.max(x, axis=-1, keepdims=True)
    return x

def build_dkt_model(input_shape, lstm_dim=32, dropout=0.0):
    input_skills = Input(shape=input_shape, name='input_skills')
    lstm = LSTM(lstm_dim, 
                return_sequences=True, 
                dropout=dropout,
                name='lstm_layer')(input_skills)
    dense = TimeDistributed(Dense(int(input_shape[-1]/2), activation='sigmoid'), name='probability_for_each')(lstm)
    
    skill_next = Input(shape=(input_shape[0], int(input_shape[1]/2)), name='next_skill_tested')
    merged = multiply([dense, skill_next], name='multiply')
    reduced = Lambda(reduce_dim, output_shape=(input_shape[0], 1), name='reduce_dim')(merged)
    
    model = Model(inputs=[input_skills, skill_next], outputs=[reduced])
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.0)
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

print('skill2skill')
skill2skill_model = build_skill2skill_model((99, skill_num), lstm_dim=64)

print('dkt')
dkt_model = build_dkt_model((99, 2 * skill_num), lstm_dim=64)
    

skill2skill
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_skills (InputLayer)    (None, 99, 111)           0         
_________________________________________________________________
lstm_layer (LSTM)            (None, 99, 64)            45056     
_________________________________________________________________
probability (TimeDistributed (None, 99, 111)           7215      
Total params: 52,271
Trainable params: 52,271
Non-trainable params: 0
_________________________________________________________________
dkt
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_skills (InputLayer)        (None, 99, 222)       0                                            
___________________________________________________________________________________

In [6]:
# train skill2skill
skill2skill_model.fit(skill_array[:, 0:-1], 
                      skill_array[:, 1:],
                      epochs=20, 
                      batch_size=32, 
                      shuffle=True,
                      validation_split=0.2)

Train on 467 samples, validate on 117 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x112e2b710>

In [7]:
dkt_model.fit([skill_response_array[:, 0:-1], skill_array[:, 1:]],
              response_array[:, 1:, np.newaxis],
              epochs=20, 
              batch_size=32, 
              shuffle=True,
              validation_split=0.2)

Train on 467 samples, validate on 117 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x12e577cf8>

# Question 1
#### What were the 5 most common and 5 least common skills in this dataset? What percentage of responses are associated with the most common skill?

The 5 most common skills were: 87: Equation Solving Two or Fewer Steps, 30: Conversion of Fraction Decimals Percents, 71: Addition and Subtraction Fractions, 68: Addition and Subtraction Integers, 33: Ordering Fractions. 

The 5 least common skills were: 28: Reading a Ruler or Scale, 102: Recognize Quadratic Pattern, 98: Finding Slope from Ordered Pairs, 96: Finding Slope From Situation, 99: Distributive Property.

87 (Equation Solving Two or Fewer Steps) was the most common skill.

5.77% of the responses corresponded with this skill.

In [8]:
sorted_df = df.groupby(by=['skill_name']).count()

most = sorted_df.sort_values(by='order_id', ascending=[False]).index[0:5]
print("5 most common skills are:", [str(skill_dict[skill]) + ": " + skill for skill in most])

least = sorted_df.sort_values(by='order_id', ascending=[True]).index[0:5]
print("5 least common skills are:", [str(skill_dict[skill]) + ": " + skill for skill in least])

5 most common skills are: ['87: Equation Solving Two or Fewer Steps', '30: Conversion of Fraction Decimals Percents', '71: Addition and Subtraction Fractions', '68: Addition and Subtraction Integers', '33: Ordering Fractions']
5 least common skills are: ['28: Reading a Ruler or Scale', '102: Recognize Quadratic Pattern', '98: Finding Slope from Ordered Pairs', '96: Finding Slope From Situation', '99: Distributive Property']


In [9]:
most_df = sorted_df.sort_values(by='order_id', ascending=[False])
total = most_df.ix[:,0].sum()
most_common_skill = most_df.iloc[0,0]

most_common_skill / total

0.057708219855885375

# Question 2
#### Train the sequence prediction model using a randomly selected 70% (training set) of students' data and predict on the remaining 30% (test set). What was the overall accuracy of skill prediction in the test set? What were the top 5 hardest and easiest to predict skills? Describe the metric you chose to represent hard/easy prediction. 

In [10]:
X = skill_array[:, 0:-1]
y = skill_array[:, 1:]
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.7, test_size=0.3)

skill2skill_model.fit(X_train, 
                      y_train,
                      epochs=20, 
                      batch_size=32, 
                      shuffle=True,
                      validation_split=0.2)

Train on 326 samples, validate on 82 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x123eccc88>

In [11]:
predictions = skill2skill_model.predict(X_test)
one_hot_predictions = []
for i in np.arange(len(predictions)):
    one_hot_layer = []
    for j in np.arange(len(predictions[0])):
            index_of_max = np.argmax(predictions[i][j])
            one_hot_version = np.zeros(skill_num)
            one_hot_version[index_of_max] = 1
            one_hot_layer.append(one_hot_version)
    one_hot_predictions.append(one_hot_layer)

In [12]:
error_rate = np.count_nonzero(y_test - one_hot_predictions)/2/(y_test.shape[0] * y_test.shape[1])
1-error_rate

0.8323576675849403

The overall accuracy of the test set is 83.2%.

#### Identifying top 5 hardest/easiest to predict skills

In [13]:
import operator

one_hot_predictions - y_test

correct = {}
incorrect = {}

for i in range(len(one_hot_predictions)):
    for j in range(len(one_hot_predictions[0])):
        prediction = one_hot_predictions[i][j]
        actual = y_test[i][j]
        
        comparison = prediction - actual
        position = np.argmax(actual) + 1
        if 1 not in comparison and -1 not in comparison:
            if position in correct.keys():
                correct[position] += 1
            else:
                correct[position] = 1
        else:
            if position in incorrect.keys():
                incorrect[position] += 1
            else:
                incorrect[position] = 1
                


In [14]:
for i in np.arange(1, 112):
    if i not in correct.keys():
        correct[i] = 0
    if i not in incorrect.keys():
        incorrect[i] = 0
        
totals = [correct[i] + incorrect[i] for i in np.arange(1, 112)]
for i in range(1, len(correct)+1):
    if totals[i-1] != 0:
        correct[i] = correct[i] / totals[i-1]
        incorrect[i] = incorrect[i] / totals[i-1]
    else:
        if correct[i] != 0 or incorrect[i] != 0:
            print("something is incorrect lol")

sorted_correct = sorted(correct.items(), key=operator.itemgetter(1))
easiest_to_identify_skills = sorted_correct[-5:]
sorted_incorrect = sorted(incorrect.items(), key=operator.itemgetter(1))
hardest_to_identify_skills = sorted_incorrect[-5:]

easiest_to_identify_skills, hardest_to_identify_skills

([(47, 0.9225225225225225),
  (3, 0.9242579324462641),
  (31, 0.9281894576012223),
  (69, 0.9320388349514563),
  (2, 0.9785714285714285)],
 [(74, 1.0), (89, 1.0), (76, 1.0), (79, 1.0), (82, 1.0)])

We defined easiest to identify skills as the skills that had the highest proportion of accurate prediction. We decided not to use mere accurate prediction as a metric because it is unfair to skills that don't appear as often in the dataset. We defined hardest to identify skills analogously -- those that had the highest proportion in the cases of inaccurate prediction.

# Question 3
#### Modify parameters of the network to increase accuracy (e.g. number of hidden nodes, optimizer, number of RNN layers, number of epochs, creating a validation set and stopping training when the validation set accuracy decreases). What were your accuracy results with respect to the hyper parameters you tuned?

In [15]:
def build_betterskill2skill_model(input_shape, lstm_dim=32, dropout=0.0):
    input = Input(shape=input_shape)
    lstm = LSTM(lstm_dim, 
                return_sequences=True, 
                dropout=dropout)(input)
    output = TimeDistributed(Dense(input_shape[-1], activation='softmax'), name='probability')(lstm)
    model = Model(inputs=[input], outputs=[output])
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.0)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

print('betterskill2skill')
betterskill2skill_model = build_betterskill2skill_model((99, skill_num), lstm_dim=64)

betterskill2skill
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 99, 111)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 99, 64)            45056     
_________________________________________________________________
probability (TimeDistributed (None, 99, 111)           7215      
Total params: 52,271
Trainable params: 52,271
Non-trainable params: 0
_________________________________________________________________


In [16]:
betterskill2skill_model.fit(X_train, 
                            y_train,
                            epochs=200, 
                            batch_size=128, 
                            shuffle=True,
                            validation_split=0.2)

Train on 326 samples, validate on 82 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Ep

<keras.callbacks.History at 0x13102c0f0>

In [17]:
predictions = betterskill2skill_model.predict(X_test)
one_hot_predictions = []
for i in np.arange(len(predictions)):
    one_hot_layer = []
    for j in np.arange(len(predictions[0])):
            index_of_max = np.argmax(predictions[i][j])
            one_hot_version = np.zeros(skill_num)
            one_hot_version[index_of_max] = 1
            one_hot_layer.append(one_hot_version)
    one_hot_predictions.append(one_hot_layer)

In [18]:
error_rate = np.count_nonzero(y_test - one_hot_predictions)/2/(y_test.shape[0] * y_test.shape[1])
error_rate

0.17056932966023874

After trying many different permutations of different hidden layers, optimization functions, acitvation functions, batch sizes, and epoch sizes, many of which returned far less accurate predictions, we have created a small incremental improvement to the model by increasing batch size to 128 and epochs to 200.

# Question 4
#### Train a performance prediction model (DKT) using the same 70/30% split and report the accuracy and AUC of prediction on the 30%.

In [19]:
x = skill_response_array[:, 0:-1]
skill = skill_array[:, 1:]
response = response_array[:, 1:, np.newaxis]

x_train, x_test, skill_train, skill_test, response_train, response_test = model_selection.train_test_split(x, skill, response, train_size=0.7, test_size=0.3)

dkt_model.fit([x_train, skill_train],
              response_train,
              epochs=20, 
              batch_size=32, 
              shuffle=True,
              validation_split=0.2)


Train on 326 samples, validate on 82 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x12e4482b0>

In [20]:
dkt_predictions = dkt_model.predict([x_test, skill_test])
for i in np.arange(len(dkt_predictions)):
    for j in np.arange(len(dkt_predictions[0])):
        value = dkt_predictions[i][j][0]
        if value >= 0.5:
            dkt_predictions[i][j][0] = 1
        else:
            dkt_predictions[i][j][0] = 0


In [21]:
error_rate = np.count_nonzero(response_test - dkt_predictions)/2/(response_test.shape[0] * response_test.shape[1])
1-error_rate

0.930153810835629

In [22]:
from sklearn import metrics 

y_true = response_test.flatten()
y_score = dkt_predictions.flatten()
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score)
auc = metrics.auc(fpr, tpr)
auc

0.59891534742506813

We got an accuracy of 92.9% and an AUC of 0.6035.

# Question 5
#### Tune the hyper parameters of this model to improve accuracy and report your improvement with respect to the tuned parameters. Which lead to the most significant improvement?

In [23]:
x = skill_response_array[:, 0:-1]
skill = skill_array[:, 1:]
response = response_array[:, 1:, np.newaxis]

x_train, x_test, skill_train, skill_test, response_train, response_test = model_selection.train_test_split(x, skill, response, train_size=0.7, test_size=0.3)

dkt_model.fit([x_train, skill_train],
              response_train,
              epochs=30, 
              batch_size=38, 
              shuffle=True,
              validation_split=0.2)

dkt_predictions = dkt_model.predict([x_test, skill_test])
for i in np.arange(len(dkt_predictions)):
    for j in np.arange(len(dkt_predictions[0])):
        value = dkt_predictions[i][j][0]
        if value >= 0.5:
            dkt_predictions[i][j][0] = 1
        else:
            dkt_predictions[i][j][0] = 0

error_rate = np.count_nonzero(response_test - dkt_predictions)/2/(response_test.shape[0] * response_test.shape[1])
print("error rate:", error_rate)

Train on 326 samples, validate on 82 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
error rate: 0.06841138659320478


In [24]:
y_true = response_test.flatten()
y_score = dkt_predictions.flatten()
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score)
auc = metrics.auc(fpr, tpr)
print("auc:", auc)

auc: 0.606662474798


We tuned the hyperparameters by increasing epochs to 30 and batch size to 38.
We increased the number of epochs because the longer we train the model, the better the model is able to understand the underlying patterns. We increased the batch size because in the input, the model is able to find relationships between different students more effectively.