In [1]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

In [2]:
import numpy as np
import pandas as pd

In [3]:
# Importing data sets to be used
training_data = pd.read_csv('combined_csv.csv')
test_data = pd.read_csv('exams.csv')

In [4]:
# Removing some columns that are not needed just yet. 
#training_data = training_data.drop(columns=['race/ethnicity'])
test_data = test_data.drop(columns=['race/ethnicity'])
test_data

Unnamed: 0,gender,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,bachelor's degree,standard,none,72,72,74
1,female,some college,standard,completed,69,90,88
2,female,master's degree,standard,none,90,95,93
3,male,associate's degree,free/reduced,none,47,57,44
4,male,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...
995,female,master's degree,standard,completed,88,99,95
996,male,high school,free/reduced,none,62,55,55
997,female,high school,free/reduced,completed,59,71,65
998,female,some college,standard,completed,68,78,77


In [5]:
# Encoding our dummy data
training_data = pd.get_dummies(training_data)
training_data = training_data.drop(columns=['gender_male', 'lunch_standard', 'test preparation course_none'])
training_data = training_data.rename(columns={"gender_female": "gender", "lunch_free/reduced":"lunch"})

test_data = pd.get_dummies(test_data)
test_data = test_data.drop(columns=['gender_male', 'lunch_standard', 'test preparation course_none'])
test_data = test_data.rename(columns={"gender_female": "gender", "lunch_free/reduced":"lunch"})
test_data

Unnamed: 0,math score,reading score,writing score,gender,parental level of education_associate's degree,parental level of education_bachelor's degree,parental level of education_high school,parental level of education_master's degree,parental level of education_some college,parental level of education_some high school,lunch,test preparation course_completed
0,72,72,74,1,0,1,0,0,0,0,0,0
1,69,90,88,1,0,0,0,0,1,0,0,1
2,90,95,93,1,0,0,0,1,0,0,0,0
3,47,57,44,0,1,0,0,0,0,0,1,0
4,76,78,75,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
995,88,99,95,1,0,0,0,1,0,0,0,1
996,62,55,55,0,0,0,1,0,0,0,1,0
997,59,71,65,1,0,0,1,0,0,0,1,1
998,68,78,77,1,0,0,0,0,1,0,0,1


In [6]:
# Establishing letter grade in the form of numeric encoded variables 
# One-hot encode integer labels
# 1 - A
# 2 - B
# 3 - C
# 4 - D
# 5 - F

# def parse_values(x):
#     if x > 90:
#        return "A"
#     elif x > 80:
#        return "B"
#     elif x > 70:
#        return "C"
#     elif x > 64:
#         return "D"
#     else:
#         return "F

def parse_values(x):
    if x > 90:
       return 1
    elif x > 80:
       return 2
    elif x > 70:
       return 3
    elif x > 64:
        return 4
    else:
        return 5
    
training_data['math grade'] = training_data['math score'].apply(parse_values)
training_data['reading grade'] = training_data['reading score'].apply(parse_values)
training_data['writing grade'] = training_data['writing score'].apply(parse_values)
test_data['math grade'] = training_data['math score'].apply(parse_values)
test_data['reading grade'] = training_data['reading score'].apply(parse_values)
test_data['writing grade'] = training_data['writing score'].apply(parse_values)
test_data

Unnamed: 0,math score,reading score,writing score,gender,parental level of education_associate's degree,parental level of education_bachelor's degree,parental level of education_high school,parental level of education_master's degree,parental level of education_some college,parental level of education_some high school,lunch,test preparation course_completed,math grade,reading grade,writing grade
0,72,72,74,1,0,1,0,0,0,0,0,0,4,5,5
1,69,90,88,1,0,0,0,0,1,0,0,1,5,5,5
2,90,95,93,1,0,0,0,1,0,0,0,0,5,5,5
3,47,57,44,0,1,0,0,0,0,0,1,0,5,5,5
4,76,78,75,0,0,0,0,0,1,0,0,0,5,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,88,99,95,1,0,0,0,1,0,0,0,1,5,5,5
996,62,55,55,0,0,0,1,0,0,0,1,0,4,3,3
997,59,71,65,1,0,0,1,0,0,0,1,1,4,4,4
998,68,78,77,1,0,0,0,0,1,0,0,1,5,3,3


In [7]:
# Segment X and y training data into different arrays

X_train = training_data[["gender", "lunch", "test preparation course_completed", "reading grade", "writing grade"]]
y_train = training_data["math grade"]

X_test = test_data[["gender", "lunch", "test preparation course_completed", "reading grade", "writing grade"]]
y_test = test_data["math grade"]
y_test

0      4
1      5
2      5
3      5
4      5
      ..
995    5
996    4
997    4
998    5
999    5
Name: math grade, Length: 1000, dtype: int64

In [8]:
# Convert the dataframe to a numpy array for Keras
X_train = X_train.values

In [9]:
y_train = to_categorical(y_train)
y_train[:10]

array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0.]], dtype=float32)

In [10]:
X_test = X_test.values
y_test = to_categorical(y_test)
y_test.shape

(1000, 6)

In [11]:
# Create an empty sequential model
model = Sequential()

In [12]:
# Add the first layer where the input dimensions are the 561 columns of the training data
model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))

In [13]:
# Add a second hidden layer
model.add(Dense(100, activation='relu'))

In [14]:
# The output layer has 13 columns that are one-hot encoded
y_train.shape

(5000, 6)

In [15]:
# Add output layer
model.add(Dense(y_train.shape[1], activation="softmax"))

In [16]:
# Compile the model using categorical_crossentropy for the loss function, the adam optimizer,
# and add accuracy to the training metrics
model.compile(loss="categorical_crossentropy",
              optimizer="adam", metrics=['accuracy'])

In [17]:
# Use the training data to fit (train) the model
model.fit(
    X_train,
    y_train,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Train on 5000 samples
Epoch 1/1000
5000/5000 - 0s - loss: 1.2202 - accuracy: 0.4656
Epoch 2/1000
5000/5000 - 0s - loss: 0.9817 - accuracy: 0.6008
Epoch 3/1000
5000/5000 - 0s - loss: 0.8553 - accuracy: 0.6428
Epoch 4/1000
5000/5000 - 0s - loss: 0.8203 - accuracy: 0.6500
Epoch 5/1000
5000/5000 - 0s - loss: 0.8005 - accuracy: 0.6598
Epoch 6/1000
5000/5000 - 0s - loss: 0.7973 - accuracy: 0.6606
Epoch 7/1000
5000/5000 - 0s - loss: 0.8011 - accuracy: 0.6632
Epoch 8/1000
5000/5000 - 0s - loss: 0.7963 - accuracy: 0.6686
Epoch 9/1000
5000/5000 - 0s - loss: 0.7911 - accuracy: 0.6632
Epoch 10/1000
5000/5000 - 0s - loss: 0.7921 - accuracy: 0.6672
Epoch 11/1000
5000/5000 - 0s - loss: 0.7893 - accuracy: 0.6682
Epoch 12/1000
5000/5000 - 0s - loss: 0.7918 - accuracy: 0.6682
Epoch 13/1000
5000/5000 - 0s - loss: 0.7885 - accuracy: 0.6616
Epoch 14/1000
5000/5000 - 0s - loss: 0.7943 - accuracy: 0.6662
Epoch 15/1000
5000/5000 - 0s - loss: 0.7912 - accuracy: 0.6648
Epoch 16/1000
5000/5000 - 0s - loss: 0.786

Epoch 131/1000
5000/5000 - 0s - loss: 0.7642 - accuracy: 0.6758
Epoch 132/1000
5000/5000 - 0s - loss: 0.7658 - accuracy: 0.6756
Epoch 133/1000
5000/5000 - 0s - loss: 0.7634 - accuracy: 0.6714
Epoch 134/1000
5000/5000 - 0s - loss: 0.7634 - accuracy: 0.6766
Epoch 135/1000
5000/5000 - 0s - loss: 0.7625 - accuracy: 0.6718
Epoch 136/1000
5000/5000 - 0s - loss: 0.7636 - accuracy: 0.6752
Epoch 137/1000
5000/5000 - 0s - loss: 0.7666 - accuracy: 0.6746
Epoch 138/1000
5000/5000 - 0s - loss: 0.7622 - accuracy: 0.6778
Epoch 139/1000
5000/5000 - 0s - loss: 0.7635 - accuracy: 0.6720
Epoch 140/1000
5000/5000 - 0s - loss: 0.7636 - accuracy: 0.6758
Epoch 141/1000
5000/5000 - 0s - loss: 0.7621 - accuracy: 0.6754
Epoch 142/1000
5000/5000 - 0s - loss: 0.7638 - accuracy: 0.6724
Epoch 143/1000
5000/5000 - 0s - loss: 0.7650 - accuracy: 0.6760
Epoch 144/1000
5000/5000 - 0s - loss: 0.7652 - accuracy: 0.6782
Epoch 145/1000
5000/5000 - 0s - loss: 0.7644 - accuracy: 0.6778
Epoch 146/1000
5000/5000 - 0s - loss: 0.

5000/5000 - 0s - loss: 0.7577 - accuracy: 0.6726
Epoch 260/1000
5000/5000 - 0s - loss: 0.7596 - accuracy: 0.6748
Epoch 261/1000
5000/5000 - 0s - loss: 0.7593 - accuracy: 0.6740
Epoch 262/1000
5000/5000 - 0s - loss: 0.7591 - accuracy: 0.6746
Epoch 263/1000
5000/5000 - 0s - loss: 0.7586 - accuracy: 0.6738
Epoch 264/1000
5000/5000 - 0s - loss: 0.7580 - accuracy: 0.6764
Epoch 265/1000
5000/5000 - 0s - loss: 0.7571 - accuracy: 0.6760
Epoch 266/1000
5000/5000 - 0s - loss: 0.7577 - accuracy: 0.6736
Epoch 267/1000
5000/5000 - 0s - loss: 0.7595 - accuracy: 0.6738
Epoch 268/1000
5000/5000 - 0s - loss: 0.7580 - accuracy: 0.6770
Epoch 269/1000
5000/5000 - 0s - loss: 0.7593 - accuracy: 0.6768
Epoch 270/1000
5000/5000 - 0s - loss: 0.7571 - accuracy: 0.6786
Epoch 271/1000
5000/5000 - 0s - loss: 0.7579 - accuracy: 0.6770
Epoch 272/1000
5000/5000 - 0s - loss: 0.7571 - accuracy: 0.6744
Epoch 273/1000
5000/5000 - 0s - loss: 0.7567 - accuracy: 0.6756
Epoch 274/1000
5000/5000 - 0s - loss: 0.7567 - accuracy

Epoch 388/1000
5000/5000 - 0s - loss: 0.7542 - accuracy: 0.6784
Epoch 389/1000
5000/5000 - 0s - loss: 0.7552 - accuracy: 0.6766
Epoch 390/1000
5000/5000 - 0s - loss: 0.7571 - accuracy: 0.6738
Epoch 391/1000
5000/5000 - 0s - loss: 0.7555 - accuracy: 0.6764
Epoch 392/1000
5000/5000 - 0s - loss: 0.7552 - accuracy: 0.6752
Epoch 393/1000
5000/5000 - 0s - loss: 0.7559 - accuracy: 0.6740
Epoch 394/1000
5000/5000 - 0s - loss: 0.7563 - accuracy: 0.6728
Epoch 395/1000
5000/5000 - 0s - loss: 0.7578 - accuracy: 0.6748
Epoch 396/1000
5000/5000 - 0s - loss: 0.7555 - accuracy: 0.6730
Epoch 397/1000
5000/5000 - 0s - loss: 0.7547 - accuracy: 0.6766
Epoch 398/1000
5000/5000 - 0s - loss: 0.7542 - accuracy: 0.6800
Epoch 399/1000
5000/5000 - 0s - loss: 0.7550 - accuracy: 0.6752
Epoch 400/1000
5000/5000 - 0s - loss: 0.7547 - accuracy: 0.6780
Epoch 401/1000
5000/5000 - 0s - loss: 0.7555 - accuracy: 0.6784
Epoch 402/1000
5000/5000 - 0s - loss: 0.7553 - accuracy: 0.6736
Epoch 403/1000
5000/5000 - 0s - loss: 0.

5000/5000 - 0s - loss: 0.7534 - accuracy: 0.6776
Epoch 517/1000
5000/5000 - 0s - loss: 0.7528 - accuracy: 0.6736
Epoch 518/1000
5000/5000 - 0s - loss: 0.7551 - accuracy: 0.6756
Epoch 519/1000
5000/5000 - 0s - loss: 0.7530 - accuracy: 0.6744
Epoch 520/1000
5000/5000 - 0s - loss: 0.7550 - accuracy: 0.6728
Epoch 521/1000
5000/5000 - 0s - loss: 0.7553 - accuracy: 0.6774
Epoch 522/1000
5000/5000 - 0s - loss: 0.7532 - accuracy: 0.6744
Epoch 523/1000
5000/5000 - 0s - loss: 0.7549 - accuracy: 0.6754
Epoch 524/1000
5000/5000 - 0s - loss: 0.7523 - accuracy: 0.6748
Epoch 525/1000
5000/5000 - 0s - loss: 0.7547 - accuracy: 0.6736
Epoch 526/1000
5000/5000 - 0s - loss: 0.7544 - accuracy: 0.6788
Epoch 527/1000
5000/5000 - 0s - loss: 0.7548 - accuracy: 0.6770
Epoch 528/1000
5000/5000 - 0s - loss: 0.7540 - accuracy: 0.6750
Epoch 529/1000
5000/5000 - 0s - loss: 0.7537 - accuracy: 0.6720
Epoch 530/1000
5000/5000 - 0s - loss: 0.7532 - accuracy: 0.6786
Epoch 531/1000
5000/5000 - 0s - loss: 0.7539 - accuracy

Epoch 645/1000
5000/5000 - 0s - loss: 0.7520 - accuracy: 0.6778
Epoch 646/1000
5000/5000 - 0s - loss: 0.7530 - accuracy: 0.6776
Epoch 647/1000
5000/5000 - 0s - loss: 0.7541 - accuracy: 0.6752
Epoch 648/1000
5000/5000 - 0s - loss: 0.7527 - accuracy: 0.6750
Epoch 649/1000
5000/5000 - 0s - loss: 0.7520 - accuracy: 0.6780
Epoch 650/1000
5000/5000 - 0s - loss: 0.7541 - accuracy: 0.6760
Epoch 651/1000
5000/5000 - 0s - loss: 0.7544 - accuracy: 0.6744
Epoch 652/1000
5000/5000 - 0s - loss: 0.7525 - accuracy: 0.6802
Epoch 653/1000
5000/5000 - 0s - loss: 0.7532 - accuracy: 0.6788
Epoch 654/1000
5000/5000 - 0s - loss: 0.7522 - accuracy: 0.6754
Epoch 655/1000
5000/5000 - 0s - loss: 0.7526 - accuracy: 0.6748
Epoch 656/1000
5000/5000 - 0s - loss: 0.7527 - accuracy: 0.6770
Epoch 657/1000
5000/5000 - 0s - loss: 0.7532 - accuracy: 0.6774
Epoch 658/1000
5000/5000 - 0s - loss: 0.7542 - accuracy: 0.6726
Epoch 659/1000
5000/5000 - 0s - loss: 0.7524 - accuracy: 0.6770
Epoch 660/1000
5000/5000 - 0s - loss: 0.

5000/5000 - 0s - loss: 0.7518 - accuracy: 0.6752
Epoch 774/1000
5000/5000 - 0s - loss: 0.7510 - accuracy: 0.6774
Epoch 775/1000
5000/5000 - 0s - loss: 0.7511 - accuracy: 0.6778
Epoch 776/1000
5000/5000 - 0s - loss: 0.7519 - accuracy: 0.6750
Epoch 777/1000
5000/5000 - 0s - loss: 0.7524 - accuracy: 0.6738
Epoch 778/1000
5000/5000 - 0s - loss: 0.7517 - accuracy: 0.6756
Epoch 779/1000
5000/5000 - 0s - loss: 0.7512 - accuracy: 0.6770
Epoch 780/1000
5000/5000 - 0s - loss: 0.7518 - accuracy: 0.6786
Epoch 781/1000
5000/5000 - 0s - loss: 0.7525 - accuracy: 0.6768
Epoch 782/1000
5000/5000 - 0s - loss: 0.7515 - accuracy: 0.6770
Epoch 783/1000
5000/5000 - 0s - loss: 0.7525 - accuracy: 0.6732
Epoch 784/1000
5000/5000 - 0s - loss: 0.7508 - accuracy: 0.6748
Epoch 785/1000
5000/5000 - 0s - loss: 0.7533 - accuracy: 0.6796
Epoch 786/1000
5000/5000 - 0s - loss: 0.7520 - accuracy: 0.6764
Epoch 787/1000
5000/5000 - 0s - loss: 0.7527 - accuracy: 0.6770
Epoch 788/1000
5000/5000 - 0s - loss: 0.7522 - accuracy

Epoch 902/1000
5000/5000 - 0s - loss: 0.7515 - accuracy: 0.6760
Epoch 903/1000
5000/5000 - 0s - loss: 0.7519 - accuracy: 0.6776
Epoch 904/1000
5000/5000 - 0s - loss: 0.7517 - accuracy: 0.6748
Epoch 905/1000
5000/5000 - 0s - loss: 0.7525 - accuracy: 0.6772
Epoch 906/1000
5000/5000 - 0s - loss: 0.7517 - accuracy: 0.6760
Epoch 907/1000
5000/5000 - 0s - loss: 0.7502 - accuracy: 0.6758
Epoch 908/1000
5000/5000 - 0s - loss: 0.7529 - accuracy: 0.6740
Epoch 909/1000
5000/5000 - 0s - loss: 0.7517 - accuracy: 0.6776
Epoch 910/1000
5000/5000 - 0s - loss: 0.7517 - accuracy: 0.6742
Epoch 911/1000
5000/5000 - 0s - loss: 0.7525 - accuracy: 0.6774
Epoch 912/1000
5000/5000 - 0s - loss: 0.7514 - accuracy: 0.6754
Epoch 913/1000
5000/5000 - 0s - loss: 0.7515 - accuracy: 0.6760
Epoch 914/1000
5000/5000 - 0s - loss: 0.7524 - accuracy: 0.6766
Epoch 915/1000
5000/5000 - 0s - loss: 0.7514 - accuracy: 0.6746
Epoch 916/1000
5000/5000 - 0s - loss: 0.7514 - accuracy: 0.6774
Epoch 917/1000
5000/5000 - 0s - loss: 0.

<tensorflow.python.keras.callbacks.History at 0x258de457438>

In [18]:
# Save the model
model.save("exam_model.h5")

In [19]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("exam_model.h5")

In [20]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1000/1000 - 0s - loss: 2.2767 - accuracy: 0.5180
Loss: 2.2766518421173094, Accuracy: 0.5180000066757202


In [21]:
# Grab just one data point to test with
test = np.expand_dims(X_test[66], axis=0)
test.shape

(1, 5)

In [22]:
# Make a prediction. The result should be 5 - STANDING
print(f"Predicted class: {model.predict_classes(test)}")

Predicted class: [5]
