In [10]:
import numpy as np # used for arrays & loading data
import tensorflow as tf # for building neural networks
from tensorflow.keras.models import Sequential  # model type that we will use
from tensorflow.keras.layers import Dense # we will use Dense layers
from tensorflow.keras.activations import linear, relu, sigmoid # some activation functions that we may use
from sklearn.preprocessing import StandardScaler # z-score normalization 

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [11]:
# loading all the training data
data = np.loadtxt('Student_performance_data.csv', delimiter=',', skiprows=1 )
print(data.shape)

(2392, 15)


In [12]:
X = data[:,:-1] # forming the input and output of the training data
y = data[:,-1]

y = np.expand_dims(y, axis=1) # make y 2D - the commands later will require it


# split into training , cross validation and test sets
from sklearn.model_selection import train_test_split
# TRAINING SET - 60%
X_train, X_temporary, y_train, y_temporary = train_test_split(X, y, test_size=0.40, random_state=1)

# the rest of 40% - CV SET(20%) and TEST SET(20%)
X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=0.50, random_state=1)
del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
print(f"test input shape:{X_test.shape}")
print(f"test output shape:{y_test.shape}")

# applying z-score to all the training, cv and test data - adjust the data based on its distribution - Adam will converge faster
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_cv_scaled = standard_scaler.transform(X_cv)
X_test_scaled = standard_scaler.transform(X_test)  # use transform, because we want the same z-score used for the training data

training input shape:(1435, 14)
training output shape:(1435, 1)
cv input shape:(478, 14)
cv output shape:(478, 1)
test input shape:(479, 14)
test output shape:(479, 1)


In [13]:
# we will use a Sequential model with Dense layers

model= Sequential(
    [
        tf.keras.Input(shape=(14,)), # input size (each song has 14 features)
        Dense(10,activation="relu", name="layer1"),   # usually, for multiclassification we use relu for all layers
        Dense(5,activation="linear", name="layer2"), # but for the last layer we use linear 
    ], name="multiclass_model"
)

In [14]:
# see details about the parameters and output of activation at every layer 
model.summary()

In [15]:
# define loss and optimizer of the Adam's algorithm
model.compile(
    # this is similar to gradient descent, but it is a much improved version
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # multiclass loss
    optimizer=tf.keras.optimizers.Adam(0.01), # preimplemented optimizer
)

In [16]:
# train the model "epochs" times
model.fit(
    X_train_scaled,y_train,
    epochs=40,
)

Epoch 1/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227us/step - loss: 1.5115 
Epoch 2/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 710us/step - loss: 0.8994
Epoch 3/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - loss: 0.7712
Epoch 4/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - loss: 0.7260
Epoch 5/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 710us/step - loss: 0.6854
Epoch 6/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 503us/step - loss: 0.6323
Epoch 7/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - loss: 0.6372
Epoch 8/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - loss: 0.5986
Epoch 9/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - loss: 0.5471
Epoch 10/40
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - l

<keras.src.callbacks.history.History at 0x215925d0320>

In [17]:
y_prediction = model.predict(X_train_scaled)  # prediction on train dataset (output matrix, where each row has 11 elements - corresponding to the nr of classes)
print(y_prediction.shape)               

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 710us/step
(1435, 5)


In [18]:
y_prediction_classes=np.empty((1,1))

# since we did not use softmax to see the exact probability for every class, argmax will help us to choose the index of the greatest element on each row
print(y_prediction_classes.shape)
for i in range(y_prediction.shape[0]):
    max_element_index = np.argmax(y_prediction[i]) # this index represents the class predicted
    y_prediction_classes = np.concatenate((y_prediction_classes,[[max_element_index]]),axis=0)
y_prediction_classes = y_prediction_classes[1:]
print(y_prediction_classes)

(1, 1)
[[3.]
 [3.]
 [4.]
 ...
 [1.]
 [2.]
 [1.]]


In [19]:
error = np.mean(y_prediction_classes != y_train) 
print(f"Training Set Classification Error: {error}")

Training Set Classification Error: 0.11358885017421602


In [24]:
# do the exact same operations for CV dataset to see the CV error

y_prediction = model.predict(X_cv_scaled) 
print(y_prediction.shape) 

y_prediction_classes=np.empty((1,1))
for i in range(y_prediction.shape[0]):
    max_element_index = np.argmax(y_prediction[i]) # this index represents the class predicted
    y_prediction_classes = np.concatenate((y_prediction_classes,[[max_element_index]]),axis=0)
y_prediction_classes = y_prediction_classes[1:]

error = np.mean(y_prediction_classes != y_cv) 
print(f"CV Set Classification Error: {error}")

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
(478, 5)
CV Set Classification Error: 0.1506276150627615


In [25]:
# do the exact same operations for TEST dataset to see the TEST error

y_prediction = model.predict(X_test_scaled) 
print(y_prediction.shape) 

y_prediction_classes=np.empty((1,1))
for i in range(y_prediction.shape[0]):
    max_element_index = np.argmax(y_prediction[i]) # this index represents the class predicted
    y_prediction_classes = np.concatenate((y_prediction_classes,[[max_element_index]]),axis=0)
y_prediction_classes = y_prediction_classes[1:]

error = np.mean(y_prediction_classes != y_test) 
print(f"Test Set Classification Error: {error}")

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
(479, 5)
Test Set Classification Error: 0.19206680584551147


In [26]:
# user interaction
id = int(input("id :"))
age = int(input("age :"))
gender = int(input("gender (1/0) :"))
etnicity = int(input("etnicity :"))
parental_education = int(input("parental_education :"))

studytime = int(input("studytime :"))
absences = int(input("absences :"))
tutoring = int(input("tutoring (1/0) :"))
support = int(input("Parental support :"))
extra = int(input("extracurricular (1/0) :"))

sports = int(input("sports (1/0):"))
music = int(input("music (1/0) :"))
volunteer = int(input("volunteering (1/0) :"))
gpa = int(input("gpa :"))

x_user = np.array([[id,age,gender,etnicity,parental_education,studytime,absences,tutoring,support,extra,sports,music,volunteer,gpa]]) # we need a 2D array 
x_user_scaled = standard_scaler.transform(x_user)
y_predicted = model.predict(x_user)
max_element_index = np.argmax(y_predicted)

print(f"Estimated grade is {max_element_index}.")

id : 1001
age : 16
gender (1/0) : 1
etnicity : 0
parental_education : 4
studytime : 13
absences : 2
tutoring (1/0) : 1
Parental support : 1
extracurricular (1/0) : 1
sports (1/0): 1
music (1/0) : 1
volunteering (1/0) : 1
gpa : 4


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Estimated grade is 4.
