In [None]:
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt 
import warnings
warnings.filterwarnings('ignore')

In [None]:
# get "working_data.pkl" from drive
share_link = 'https://drive.google.com/file/d/1-dXLFHTFpFQi6agd_UqkKl76BFgx5HdP/view?usp=sharing'
loc = 'https://drive.google.com/uc?export=download&id=' + share_link.split('/')[-2]
print(loc)
working_df = pd.read_csv(loc).fillna("")

# get "debug_data.pkl" from drive
share_link = 'https://drive.google.com/file/d/1-fDWeNN8UG3eZtQiwjPRxED7AoomPeeG/view?usp=sharing'
loc = 'https://drive.google.com/uc?export=download&id=' + share_link.split('/')[-2]
print(loc)
debug_df = pd.read_csv(loc).fillna("")

https://drive.google.com/uc?export=download&id=1-dXLFHTFpFQi6agd_UqkKl76BFgx5HdP
https://drive.google.com/uc?export=download&id=1-fDWeNN8UG3eZtQiwjPRxED7AoomPeeG


In [None]:
working_df.head(5)

Unnamed: 0,Year,Term,Subject,Number,Course Title,Sched Type,Primary Instructor,GPA
0,2015,Fall,LAS,101,Freshman Seminar,DIS,"Hoffman, Ruth A",2.875417
1,2011,Summer,MATH,124,Finite Mathematics,LCD,"Arroyo, Aisha H",2.904286
2,2016,Fall,MATH,417,Intro to Abstract Algebra,,"Tramel, Rebecca",3.284074
3,2017,Spring,ACCY,302,Decision Making for Atg,,"Fanning, Kirsten B",3.381765
4,2020,Spring,ECE,120,Introduction to Computing,LEC,"Aggarwal, Anu",3.595


In [None]:
debug_df.head(5)

Unnamed: 0,Year,Term,Subject,Number,Course Title,Sched Type,Primary Instructor,GPA
0,2020,Spring,ACCY,503,Managerial Accounting A,ONL,"Hecht, Gary W",3.644779
1,2015,Fall,BUS,101,Business Prof Responsibility,LCD,"DeBrock, Lawrence M",3.308148
2,2018,Spring,ME,310,Fundamentals of Fluid Dynamics,,"Juarez, Gabriel",3.212
3,2014,Spring,MATH,285,Intro Differential Equations,LEC,"Baryshnikov, Juliy",2.960678
4,2015,Spring,LAST,170,Introduction to Latin America,DIS,"Kuyumjian, Marcelo B",3.679259


In [None]:
enc = OrdinalEncoder()
encoded = working_df.copy()
encoded[['Term', 'Subject', 'Course Title', 'Sched Type', 'Primary Instructor']] = enc.fit_transform(working_df[['Term', 'Subject', 'Course Title', 'Sched Type', 'Primary Instructor']])

In [None]:
data = encoded.values
data.shape

(10000, 8)

In [None]:
import torch
train_data, val_data, test_data = torch.utils.data.random_split(data, [7000, 2000, 1000])

In [None]:
import torch.nn as nn
class MLP(nn.Module):
  def __init__(self, feature_dim, dropout=0.3):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(feature_dim, 10)
        self.act = nn.ReLU()
        self.drop = nn.Dropout(dropout)
        self.layer2 = nn.Linear(10, 1)

  def forward(self, x):
    return self.layer2(self.drop(self.act(self.layer1(x))))


In [None]:
import torch.optim as optim
import numpy as np

batch_size = 2000
params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers': 8, 'pin_memory': True}
eval_params = {'batch_size': batch_size,
               'shuffle': False,
               'num_workers': 8, 'pin_memory': True}


train_loader = torch.utils.data.DataLoader(train_data, **params)
val_loader = torch.utils.data.DataLoader(val_data, **eval_params)
test_loader = torch.utils.data.DataLoader(test_data, **eval_params)
model = MLP(train_data[0].shape[0]-1, dropout=0.5)
optimizer = optim.Adam(model.parameters(), lr=0.01)
# optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=5e-4)
save = 'best.pt'
mse = nn.MSELoss()

**Batch size 1000, Dropout 0.3:**


1.   Best performance using Adam as optimizer with lr = 0.01 : 0.18 MSE on test data
2.   Using SGD with lr = 0.01 does not converge.

**Batch size 2000, Dropout 0.3:**


1.   Best performance using Adam as optimizer with lr = 0.01 : 0.15 MSE on test data
2.   Using SGD with lr = 0.01 does not converge.

**Batch size 2000, Dropout 0.5:**


1.   Best performance using Adam as optimizer with lr = 0.01 : 0.17 MSE on test data

**Full training without batch sampling, Dropout 0.3:**
1.   Best performance using Adam as optimizer with lr = 0.01 : 5.93  MSE on test data. It converges very slowly.

In [None]:
from sklearn.metrics import mean_squared_error

def evaluate(model, eval_loader):
    model.eval()
    score = 0.0
    predicted = []
    truth = []
    with torch.no_grad():
      for batch_idx, data in enumerate(eval_loader):
        x = data[:, :7].float()
        y = data [:, -1].float()

        yhat = model(x)

        predicted.append(yhat.cpu().numpy())
        truth.append(y.cpu().numpy())
        
    return mean_squared_error(np.concatenate(predicted), np.concatenate(truth))

In [None]:
def train(model, train_loader, optimizer, save):
  best_score = np.inf
  for epoch in range(0, 100):
    model.train()
    train_loss = 0.0

    for batch_idx, data in enumerate(train_loader):
      x = data[:, :7].float()
      y = data [:, -1].float()
      yhat = model(x)

      loss = mse(yhat, y)
      loss.backward()
      train_loss += loss.item()
      optimizer.step()
    if epoch % 5 == 0:
      curr_score = evaluate(model, val_loader)
      
      print('| epoch {:3d} | loss {:4.2f} | curr_score {:4.2f}'.format(epoch + 1, train_loss / len(train_loader), curr_score))
      if curr_score < best_score: 
        best_score = curr_score
        with open(save, 'wb') as f:
          torch.save(model, f)
    else:
      print('| epoch {:3d} | loss {:4.2f} '.format(epoch + 1, train_loss / len(train_loader)))


In [None]:
train(model, train_loader, optimizer, save)
with open(save, 'rb') as f:
    model = torch.load(f)
test_score = evaluate(model, test_loader)
print('test_score {:4.2f}'.format(test_score))

| epoch   1 | loss 34841.82 | curr_score 2988.93
| epoch   2 | loss 10801.89 
| epoch   3 | loss 4094.20 
| epoch   4 | loss 2549.74 
| epoch   5 | loss 742.17 
| epoch   6 | loss 68.67 | curr_score 20.40
| epoch   7 | loss 26.74 
| epoch   8 | loss 23.40 
| epoch   9 | loss 17.81 
| epoch  10 | loss 16.27 
| epoch  11 | loss 16.24 | curr_score 16.41
| epoch  12 | loss 16.45 
| epoch  13 | loss 16.63 
| epoch  14 | loss 16.81 
| epoch  15 | loss 16.97 
| epoch  16 | loss 17.05 | curr_score 17.14
| epoch  17 | loss 17.14 
| epoch  18 | loss 17.16 
| epoch  19 | loss 17.13 
| epoch  20 | loss 17.08 
| epoch  21 | loss 16.95 | curr_score 16.90
| epoch  22 | loss 16.81 
| epoch  23 | loss 16.59 
| epoch  24 | loss 16.35 
| epoch  25 | loss 16.09 
| epoch  26 | loss 15.76 | curr_score 15.55
| epoch  27 | loss 15.37 
| epoch  28 | loss 15.00 
| epoch  29 | loss 14.59 
| epoch  30 | loss 14.17 
| epoch  31 | loss 13.72 | curr_score 13.47
| epoch  32 | loss 13.29 
| epoch  33 | loss 12.84 
| e