## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

results = {}

In [None]:
df = pd.read_csv('../data/processed_data/student-mat.csv')
y = df['GPA']
x = df.drop(['GPA'], axis = 1)

In [None]:
x = x.iloc[:, 1:]

In [None]:
print(f"The shape of the input features is: {x.shape}\nThe shape of target parameters is: {y.shape}")

In [None]:
x.head()

In [None]:
y.head()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
print(f"The training set input feature has size: {x_train.shape}")
print(f"The training set output parameter has size: {y_train.shape}")
print(f"The test set input feature has the shape: {x_test.shape}")
print(f"The test set output parameter has the shape: {y_test.shape}")

## The Linear Regression Approach

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error

First we build a prototype model

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
yhat_train = model.predict(x_train)
print(f"Training MSE: {mean_squared_error(y_train, yhat_train)}")
yhat_test = model.predict(x_test)
print(f"Testing MSE: {mean_squared_error(y_test, yhat_test)}")

## Polynomial Regression Approach

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
J = {'1': (4.428325516620169, 5.761991434404496)}
for i in range(2, 5):
    poly_reg = PolynomialFeatures(degree = i)
    x_poly_train = poly_reg.fit_transform(x_train)
    x_poly_test = poly_reg.fit_transform(x_test)
    model.fit(x_poly_train, y_train)

    yhat_train = model.predict(x_poly_train)
    yhat_test = model.predict(x_poly_test)
    
    train_error = mean_squared_error(y_train, yhat_train)
    test_error = mean_squared_error(y_test, yhat_test)
    
    print("="*20)
    print(f"Polynomial degree: {i}")
    print(f"Training MSE: {train_error}")
    print(f"Testing MSE: {test_error}")
    
    J[str(i)] = (train_error, test_error)
    
    

In [None]:
plt.plot(J.keys(), J.values())

Clearly degree 1 has the lowest test_error, and due to high variance the test_error increases

In [None]:
ridge = Ridge(alpha = 10e6)

poly_reg = PolynomialFeatures(degree = 2)
x_poly_train = poly_reg.fit_transform(x_train)
x_poly_test = poly_reg.fit_transform(x_test)
ridge.fit(x_poly_train, y_train)

mean_squared_error(ridge.predict(x_poly_test), y_test)

This is the lowest error that we can get from a polynomial reg model... 

In [None]:
model = Ridge(alpha = 10)
model.fit(x_train, y_train)
yhat_test = model.predict(x_test)
print(f"Testing MSE: {mean_squared_error(y_test, yhat_test)}")

This is the best we can do from Linear regression model, with Ridge Classification

In [None]:
model = Lasso(alpha = 10)
model.fit(x_train, y_train)
yhat_test = model.predict(x_test)
print(f"Testing MSE: {mean_squared_error(y_test, yhat_test)}")

In [None]:
model = ElasticNet(alpha = 10)
model.fit(x_train, y_train)
yhat_test = model.predict(x_test)
print(f"Testing MSE: {mean_squared_error(y_test, yhat_test)}")

In [None]:
results['Regression'] = 11.237178423746085

## Neural Networks Approach

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.activations import linear, relu, sigmoid, softmax
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy, MeanSquaredError

In [None]:
def build_models():
    
    tf.random.set_seed(20)
    
    model_1 = Sequential(
        [
            Dense(25, activation = 'relu'),
            Dense(15, activation = 'relu'),
            Dense(1, activation = 'linear')
        ],
        name='model_1'
    )

    model_2 = Sequential(
        [
            Dense(20, activation = 'relu'),
            Dense(12, activation = 'relu'),
            Dense(12, activation = 'relu'),
            Dense(20, activation = 'relu'),
            Dense(1, activation = 'linear')
        ],
        name='model_2'
    )

    model_3 = Sequential(
        [
            Dense(32, activation = 'relu'),
            Dense(16, activation = 'relu'),
            Dense(8, activation = 'relu'),
            Dense(4, activation = 'relu'),
            Dense(12, activation = 'relu'),
            Dense(1, activation = 'linear')
        ],
        name='model_3'
    )
    
    model_list = [model_1, model_2, model_3]
    
    return model_list

Testing the above structures, and then choosing the one that gives lowest error

In [None]:
# Initialize lists that will contain the errors for each model
nn_train_mses = []
nn_test_mses = []

# Build the models
nn_models = build_models()

# Loop over the models
for model in nn_models:

    # Setup the loss and optimizer
    model.compile(
        loss = 'mse',
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.1),
    )

    print(f"Training {model.name}...")

    # Train the model
    model.fit(
        x_train, y_train,
        epochs = 300,
        verbose = 0,
    )

    print("Done!\n")

    # Record the training MSEs
    yhat_train = model.predict(x_train)
    train_mse = mean_squared_error(y_train, yhat_train)/2
    nn_train_mses.append(train_mse)

    # Record the cross validation MSEs
    yhat_test = model.predict(x_test)
    test_mse = mean_squared_error(y_test, yhat_test)/2
    nn_test_mses.append(test_mse)

# print results
print("RESULTS:")
for model_num in range(len(nn_train_mses)):
    print(
        f"Model {model_num+1}: Training MSE: {nn_train_mses[model_num]:.2f}, " +
        f"Test MSE: {nn_test_mses[model_num]:.2f}"
    )

### Trying Regularizers

In [None]:
from tensorflow.keras.regularizers import L2
def build_models(lambda_):
    
    tf.random.set_seed(20)
    
    model_1 = Sequential(
        [
            Dense(25, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(15, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(1, activation = 'linear', kernel_regularizer=L2(lambda_))
        ],
        name='model_1'
    )

    model_2 = Sequential(
        [
            Dense(20, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(12, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(12, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(20, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(1, activation = 'linear', kernel_regularizer=L2(lambda_))
        ],
        name='model_2'
    )

    model_3 = Sequential(
        [
            Dense(32, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(16, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(8, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(4, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(12, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(1, activation = 'linear', kernel_regularizer=L2(lambda_))
        ],
        name='model_3'
    )
    
    model_list = [model_1, model_2, model_3]
    
    return model_list

In [None]:
# Initialize lists that will contain the errors for each model
nn_train_mses = []
nn_test_mses = []

# Build the models
nn_models = build_models(0.01)

# Loop over the models
for model in nn_models:

    # Setup the loss and optimizer
    model.compile(
        loss = 'mse',
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.1),
    )

    print(f"Training {model.name}...")

    # Train the model
    model.fit(
        x_train, y_train,
        epochs = 300,
        verbose = 0,
    )

    print("Done!\n")

    # Record the training MSEs
    yhat_train = model.predict(x_train)
    train_mse = mean_squared_error(y_train, yhat_train)/2
    nn_train_mses.append(train_mse)

    # Record the cross validation MSEs
    yhat_test = model.predict(x_test)
    test_mse = mean_squared_error(y_test, yhat_test)/2
    nn_test_mses.append(test_mse)

# print results
print("RESULTS:")
for model_num in range(len(nn_train_mses)):
    print(
        f"Model {model_num+1}: Training MSE: {nn_train_mses[model_num]:.2f}, " +
        f"Test MSE: {nn_test_mses[model_num]:.2f}"
    )

We see that model three is better... hence we use this model further

In [None]:
def build_model(lambda_):
    model = Sequential(
        [
            Dense(32, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(16, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(8, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(4, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(12, activation = 'relu', kernel_regularizer=L2(lambda_)),
            Dense(1, activation = 'linear', kernel_regularizer=L2(lambda_))
        ],
        name='model_3'
    )
    return model

In [None]:
nn_train_mses = []
nn_test_mses = []
lambdas = []

for i in range(1, 50, 2):
    lambda_ = 0.001*2*i
    lambdas.append(lambda_)
    model = build_model(lambda_)

    model.compile(
        loss = 'mse',
        optimizer = Adam(learning_rate=0.1),
    )
    print(f"Training for lambda = {lambda_}...")

    model.fit(
        x_train, y_train,
        epochs = 300,
        verbose = 0,
    )

    print("Done!\n")

    yhat_train = model.predict(x_train)
    train_mse = mean_squared_error(y_train, yhat_train)/2
    nn_train_mses.append(train_mse)

    yhat_test = model.predict(x_test)
    test_mse = mean_squared_error(y_test, yhat_test)/2
    nn_test_mses.append(test_mse)

print("RESULT:")
idx = 0
for lambda_ in lambdas:
    print(
        f"Lambda = {lambda_}:\nTraining MSE: {nn_train_mses[idx]:.2f}, " +
        f"Test MSE: {nn_test_mses[idx]:.2f}"
    )
    idx += 1

From the Above iterations, we choose the lambda value 0.026

In [None]:
lambda_ = 0.066
model = Sequential(
    [
        Dense(32, activation = 'relu', kernel_regularizer=L2(lambda_)),
        Dense(16, activation = 'relu', kernel_regularizer=L2(lambda_)),
        Dense(8, activation = 'relu', kernel_regularizer=L2(lambda_)),
        Dense(4, activation = 'relu', kernel_regularizer=L2(lambda_)),
        Dense(12, activation = 'relu', kernel_regularizer=L2(lambda_)),
        Dense(1, activation = 'linear', kernel_regularizer=L2(lambda_))
    ],
    name='model_fin'
)

In [None]:
model.compile(
    loss = 'mse',
    optimizer = Adam(learning_rate=0.1),
)
model.fit(x_train, y_train,
          epochs = 300,
          verbose = 0)

In [None]:
yhat_train = model.predict(x_train)
print(f"The training set error is: {mean_squared_error(y_train, yhat_train)/2}")

yhat_test = model.predict(x_test)
print(f"The testing set error is: {mean_squared_error(y_test, yhat_test)/2}")

In [None]:
results['NN'] = 5.638041873490467

## Descision Tree Approach

In [None]:
from xgboost import XGBRegressor

In [None]:
J = {}
for i in range(1, 50, 2):
    lambda_ = 0.001*2*i
    model = XGBRegressor(n_estimators = 500, learning_rate = 0.1, verbosity = 1, random_state = 0, gamma = lambda_)
    model.fit(x_train, y_train, eval_set=[(x_test, y_test)], early_stopping_rounds=10)

    yhat_train = model.predict(x_train)
    yhat_test = model.predict(x_test)

    train_error = mean_squared_error(yhat_train, y_train)
    test_error = mean_squared_error(yhat_test, y_test)

    J[str(i)] = (train_error, test_error)
plt.plot(J.keys(), J.values())

The best is the 31st iteration, 0.062

In [None]:
lambda_ = 0.001*2*31
model = XGBRegressor(n_estimators = 500, learning_rate = 0.1, verbosity = 1, random_state = 0, gamma = lambda_)
model.fit(x_train, y_train, eval_set=[(x_test, y_test)], early_stopping_rounds=10)

yhat_train = model.predict(x_train)
yhat_test = model.predict(x_test)

In [None]:
yhat_train = model.predict(x_train)
print(f"The training set error is: {mean_squared_error(y_train, yhat_train)/2}")

yhat_test = model.predict(x_test)
print(f"The testing set error is: {mean_squared_error(y_test, yhat_test)/2}")

In [None]:
results['XGB'] = 5.468958891337183
data = {'Model': list(results.keys()), 'Error': list(results.values())}
df = pd.DataFrame(data)
df

In [None]:
import seaborn as sns
sns.barplot(df, x = 'Model', y = 'Error')