# Import

In [2]:
import requests 
import numpy as np
import json
import pandas as pd
from sklearn.model_selection import train_test_split

# Data Loading

In [15]:
df = pd.read_csv('data/Student_performance_data.csv')

if 'StudentID' in df.columns:
    df.drop(columns=['StudentID'], inplace=True)

    df.to_csv('data/Student_performance_data.csv')

df.head()

Unnamed: 0,Age,Gender,Ethnicity,ParentalEducation,StudyTimeWeekly,Absences,Tutoring,ParentalSupport,Extracurricular,Sports,Music,Volunteering,GPA,GradeClass
0,17,1,0,2,19.833723,7,1,2,0,0,1,0,2.929196,2.0
1,18,0,0,1,15.408756,0,0,1,0,0,0,0,3.042915,1.0
2,15,0,2,3,4.21057,26,0,2,0,0,0,0,0.112602,4.0
3,17,1,0,3,10.028829,14,0,3,1,0,0,0,2.054218,3.0
4,17,1,0,2,4.672495,17,1,3,0,0,0,0,1.288061,4.0


# Set Label Class

In [4]:
class_names = ["A", "B", "C", "D", "F"]

# Data Preprocessing

Apply one-hot-encoding to categorical features

In [16]:
ethnicity_dict = {0: 'caucasian', 1: 'african_american', 2: 'asian', 3: 'other'}
df['Ethnicity'] = [ethnicity_dict[i] for i in df['Ethnicity']]

education_dict = {0: 'None', 1: 'HighSchool', 2: 'SomeCollege', 3: 'Bachelor', 4: 'Higher'}
df['ParentalEducation'] = [education_dict[i] for i in df['ParentalEducation']]

support_dict = {0: 'None', 1: 'Low', 2: 'Moderate', 3: 'High', 4: 'VeryHigh'}
df['ParentalSupport'] = [support_dict[i] for i in df['ParentalSupport']]

In [18]:
ethnicity = pd.get_dummies(df['Ethnicity'])
parental_education = pd.get_dummies(df['ParentalEducation'])
parental_support = pd.get_dummies(df['ParentalSupport'])

ethnicity.columns = [f'Ethnicity_{col}' for col in ethnicity.columns]
parental_education.columns = [f'ParentalEducation_{col}' for col in parental_education.columns]
parental_support.columns = [f'ParentalSupport{col}' for col in parental_support.columns]

df = pd.concat([df, ethnicity, parental_education, parental_support], axis=1)
df = df.drop(columns=['Ethnicity', 'ParentalEducation', 'ParentalSupport'])
df

Unnamed: 0,Age,Gender,StudyTimeWeekly,Absences,Tutoring,Extracurricular,Sports,Music,Volunteering,GPA,...,ParentalEducation_Bachelor,ParentalEducation_HighSchool,ParentalEducation_Higher,ParentalEducation_None,ParentalEducation_SomeCollege,ParentalSupportHigh,ParentalSupportLow,ParentalSupportModerate,ParentalSupportNone,ParentalSupportVeryHigh
0,17,1,19.833723,7,1,0,0,1,0,2.929196,...,0,0,0,0,1,0,0,1,0,0
1,18,0,15.408756,0,0,0,0,0,0,3.042915,...,0,1,0,0,0,0,1,0,0,0
2,15,0,4.210570,26,0,0,0,0,0,0.112602,...,1,0,0,0,0,0,0,1,0,0
3,17,1,10.028829,14,0,1,0,0,0,2.054218,...,1,0,0,0,0,1,0,0,0,0
4,17,1,4.672495,17,1,0,0,0,0,1.288061,...,0,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,18,1,10.680555,2,0,1,0,0,0,3.455509,...,1,0,0,0,0,0,0,0,0,1
2388,17,0,7.583217,4,1,0,1,0,0,3.279150,...,0,1,0,0,0,0,0,0,0,1
2389,16,1,6.805500,20,0,0,0,0,1,1.142333,...,0,0,0,0,1,0,0,1,0,0
2390,16,1,12.416653,17,0,0,1,1,0,1.803297,...,0,0,0,1,0,0,0,1,0,0


## Split into features and label

In [33]:
X = df.drop(columns=['GradeClass'])
y = df['GradeClass']

In [68]:
row = np.array(X.iloc[0].values)
label = y.iloc[0]

print('Test row: ', row, '\n')
print('Test label: ', label)

Test row:  Age                              17.000000
Gender                            1.000000
StudyTimeWeekly                  19.833723
Absences                          7.000000
Tutoring                          1.000000
Extracurricular                   0.000000
Sports                            0.000000
Music                             1.000000
Volunteering                      0.000000
GPA                               2.929196
Ethnicity_african_american        0.000000
Ethnicity_asian                   0.000000
Ethnicity_caucasian               1.000000
Ethnicity_other                   0.000000
ParentalEducation_Bachelor        0.000000
ParentalEducation_HighSchool      0.000000
ParentalEducation_Higher          0.000000
ParentalEducation_None            0.000000
ParentalEducation_SomeCollege     1.000000
ParentalSupportHigh               0.000000
ParentalSupportLow                0.000000
ParentalSupportModerate           1.000000
ParentalSupportNone               0.000000


In [69]:
split_row = [row[i:i+1] for i in range(24)]

(24,)
[array([17.]), array([1.]), array([19.83372281]), array([7.]), array([1.]), array([0.]), array([0.]), array([1.]), array([0.]), array([2.92919559]), array([0.]), array([0.]), array([1.]), array([0.]), array([0.]), array([0.]), array([0.]), array([0.]), array([1.]), array([0.]), array([0.]), array([1.]), array([0.]), array([0.])]


In [72]:
import tensorflow as tf

model = tf.keras.models.load_model('serving_model_dir/student-performance-model/1719417006')

prediction = model.predict(split_row)

prediction_index = np.argmax(prediction)
print(class_names[prediction_index])

prediction [[7.731647e-31 4.683761e-38 1.873593e-21 0.000000e+00 1.000000e+00]]
index:  4
F


# Make Prediction Request

In [None]:
json_data = json.dumps(
    {
        "instances": row.tolist()
    }
)

endpoint = "http://localhost:8080/v1/models/student-performance-model:predict"

response = requests.post(endpoint, data=json_data)
prediction = np.argmax(response.json()["predictions"][0]).numpy()
print(class_names[prediction])