In [133]:
from tensorflow.keras.metrics import MeanSquaredError
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping

In [134]:
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')
X_train = train_data.iloc[:, 4:-1].to_numpy()
Y_train = train_data.iloc[:,-1].to_numpy()
X_test = test_data.iloc[:, 4:].to_numpy()
train_data

Unnamed: 0,StudentID,Age,Gender,Ethnicity,ParentalEducation,StudyTimeWeekly,Absences,Tutoring,ParentalSupport,Extracurricular,Sports,Music,Volunteering,GPA
0,1643,18,1,Caucasian,Higher,18.118879,24,1,,1,1,0,0,1.396278
1,2753,16,1,Other,Some College,2.469271,19,0,Low,0,0,0,1,0.754751
2,2402,17,0,Asian,Bachelor,3.456401,25,0,,0,0,0,0,0.136166
3,3033,17,1,Caucasian,High School,15.838131,25,1,Moderate,0,0,0,0,1.172192
4,1991,16,0,Caucasian,,1.045373,19,1,Moderate,0,0,1,0,1.747356
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1908,2639,15,0,African American,,13.092424,13,1,Low,0,0,0,0,1.595627
1909,2096,16,0,Caucasian,Some College,2.159249,19,0,Very High,0,1,0,0,1.523945
1910,2131,16,1,Caucasian,Some College,3.572578,14,1,Moderate,1,1,0,0,2.051919
1911,2295,18,0,Other,Some College,4.402585,20,0,High,0,0,0,0,1.348997


In [135]:
def one_hard_encoding(data):
    classes = set(data)
    class_dict = {}
    N = len(data)
    K = len(classes)
    ohe = np.zeros((N, K))
    for i, clas in zip(range(K), classes):
        class_dict[clas] = i
    for i in range(N):
        ohe[i, class_dict[data[i]]] = 1
    return ohe
    

In [136]:
X_train = np.concatenate([one_hard_encoding(X_train[:, 0]), X_train[:, 1:4], one_hard_encoding(X_train[:, 4]), X_train[:, 5:]], axis=1).astype(np.float32)
X_test = np.concatenate([one_hard_encoding(X_test[:, 0]), X_test[:, 1:4], one_hard_encoding(X_test[:, 4]), X_test[:, 5:]], axis=1).astype(np.float32)

In [137]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [138]:
class ANN(tf.keras.Model):
    def __init__(self, hidden_layer_sizes, p_keep):
        super(ANN, self).__init__()
        self.hidden_layers = []
        self.dropout_rates = p_keep

        M1 = X_train.shape[1] 
        for M2 in hidden_layer_sizes:
            self.hidden_layers.append(tf.keras.layers.Dense(M2, activation='relu'))
            self.hidden_layers.append(tf.keras.layers.Dropout(self.dropout_rates[0]))
            self.dropout_rates = self.dropout_rates[1:]

        self.output_layer = tf.keras.layers.Dense(1)

    def call(self, inputs, training=False):
        x = inputs
        for layer in self.hidden_layers:
            x = layer(x, training=training)
        return self.output_layer(x)
    
    def r_squared(y_true, y_pred):
        residual = tf.reduce_sum(tf.square(y_true - y_pred))
        total = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
        r2 = 1 - residual / (total + tf.keras.backend.epsilon())
        return r2

    def fit_model(self, X, Y, lr=1e-2, epochs=1000, batch_sz=100, X_val=None, Y_val=None):
        self.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                    loss='mse', metrics=['mae', r_squared])

        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        ]

        self.fit(X, Y, 
                validation_data=(X_val, Y_val), 
                epochs=epochs, 
                batch_size=batch_sz, 
                callbacks=callbacks, 
                verbose=1)


In [139]:
submission = None
def main():
    global submission
    ann = ANN([4000, 4000], [0.8, 0.5, 0.5])

    ann.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2),
                loss='mse', metrics=['mae']) 
    ann.fit(X_train, Y_train)
    predictions = ann.predict(X_test)
    submission = pd.DataFrame({'GPA': predictions.reshape(len(predictions))})
if __name__ == '__main__':
    main()


[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 47ms/step - loss: 1.0339 - mae: 0.8028
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
