In [29]:
from copy import deepcopy
from pathlib import Path

import pandas as pd
import numpy as np

import torch
import torch.nn as nn

from src.dataset_loader import get_np_dataset
from src.grades_dataset import TensorDatasetGpu
from src.models import GradePredictionModel

In [30]:
path = Path(r'C:\Python\projects\CGPAPrediction\data\final_grade_prediction_dataset')

In [31]:
emb_threshold = 25

data, y_data, emb_dims, one_hot_dims = get_np_dataset(emb_threshold, dataset_path=path)
y_val = y_data['val'].numpy()
y_test = y_data['test'].numpy()

In [32]:
inputs_description = {'Educational Program / Major': 'embedding', 'Payment Type': 'one-hot',
                      'Funding Type': 'one-hot', 'Language of Instruction': 'one-hot',
                      'Institute': 'one-hot', 'Study Mode': 'one-hot', 'Course': 'embedding',
                      'Instructor Full Name': 'embedding', 'Instructor Department': 'embedding',
                      'Student Gender': 'one-hot', 'Instructor Gender': 'one-hot', 'Rating': 'number',
                      'Midterm 1': 'number', 'Midterm 2': 'number'}

In [53]:
inputs_description

{'Educational Program / Major': ('embedding', (58, 7)),
 'Payment Type': ('one-hot', 2),
 'Funding Type': ('one-hot', 9),
 'Language of Instruction': ('one-hot', 2),
 'Institute': ('one-hot', 13),
 'Study Mode': ('one-hot', 7),
 'Course': ('embedding', (502, 22)),
 'Instructor Full Name': ('embedding', (443, 21)),
 'Instructor Department': ('embedding', (73, 8)),
 'Student Gender': ('one-hot', 2),
 'Instructor Gender': ('one-hot', 2),
 'Rating': ('number',),
 'Midterm 1': ('number',),
 'Midterm 2': ('number',)}

In [58]:
{k: v for k, v in inputs_description.items() if v[0] == 'number'}

{'Rating': ('number',), 'Midterm 1': ('number',), 'Midterm 2': ('number',)}

In [33]:
for col, input_type in inputs_description.items():
    if input_type == 'embedding':
        inputs_description[col] = ('embedding', (emb_dims[col]))
    elif input_type == 'one-hot':
        inputs_description[col] = ('one-hot', one_hot_dims[col])
    else:
        inputs_description[col] = (inputs_description[col],)

other_semesters_inputs_description = inputs_description.copy()
other_semesters_inputs_description['Final Grade'] = ('number',)

In [34]:
loss_weights = 1 / torch.from_numpy(pd.Series(y_data['train']).value_counts().sort_index().values)
loss_weights = loss_weights / loss_weights.sum() * loss_weights.size(0)

In [35]:
loss_weights

tensor([1.8108, 0.5129, 0.6763])

In [36]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("✅ Using device:", device)

✅ Using device: cuda


In [37]:
train_dataset = TensorDatasetGpu(data['train'], y_data['train'], device)
x_val = TensorDatasetGpu.move_to_device(data['val'], device)
x_test = TensorDatasetGpu.move_to_device(data['test'], device)
loss_weights = loss_weights.to(device)

In [38]:
def show_device(x):
    if torch.is_tensor(x):
        print(x.device)
    elif isinstance(x, dict):
        for k, v in x.items():
            if torch.is_tensor(v):
                print(f"{k}: {v.device}")

In [42]:
params = {
    'lstm_hidden': 64,
    'attn_hidden': 64,
    'l1': 64,
    'l2': 32,
    'num_classes': 3,
    'dropout': 0.0,
    'num_lstm_layers': 2,
}

best_model = None
best_f1 = 0
learning_data = []

In [59]:
inputs_description


{'Educational Program / Major': ('embedding', (58, 7)),
 'Payment Type': ('one-hot', 2),
 'Funding Type': ('one-hot', 9),
 'Language of Instruction': ('one-hot', 2),
 'Institute': ('one-hot', 13),
 'Study Mode': ('one-hot', 7),
 'Course': ('embedding', (502, 22)),
 'Instructor Full Name': ('embedding', (443, 21)),
 'Instructor Department': ('embedding', (73, 8)),
 'Student Gender': ('one-hot', 2),
 'Instructor Gender': ('one-hot', 2),
 'Rating': ('number',),
 'Midterm 1': ('number',),
 'Midterm 2': ('number',)}

In [43]:
from src.utils import evaluate_model

batch_size = 64

model = GradePredictionModel(inputs_description, params).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.CrossEntropyLoss(reduction='none')

epochs_without_improvements = 0
max_epochs_without_improvements = 20
early_stop_count = 0
max_early_stop_count = 50
lr_step = 0.4

epochs = 100
for i in range(epochs):
    for x, y in train_dataset.iter_batches(batch_size):
        optimizer.zero_grad()
        preds = model(**x)
        loss = loss_fn(preds, y)
        weights = loss_weights[y]
        loss = (loss * weights).mean()

        loss.backward()
        optimizer.step()

    print(f"Epoch: {i}")
    print(f"Learning rate: {optimizer.param_groups[0]['lr']}")
    print("Train:")
    model.eval()
    with torch.no_grad():
        train_preds = model(**train_dataset.x_gpu).argmax(dim=1).cpu().numpy()
        train_true = train_dataset.y_cpu.numpy()
        evaluate_model(train_true, train_preds, learning_data)
        print("Val:")
        f1weighted = evaluate_model(y_val, model(**x_val).argmax(dim=1).cpu().numpy(), learning_data)
        if f1weighted > best_f1:
            early_stop_count = 0
            epochs_without_improvements = 0
            best_f1 = f1weighted
            best_model = deepcopy(model)
        else:
            early_stop_count += 1
            if early_stop_count == max_early_stop_count:
                break
            epochs_without_improvements += 1
            print(f"Epochs without improvements - {epochs_without_improvements}")
            if epochs_without_improvements == max_epochs_without_improvements:
                epochs_without_improvements = 0
                optimizer.param_groups[0]['lr'] *= lr_step
                print(f"Current LR - {optimizer.param_groups[0]['lr']}")
                model = deepcopy(best_model)
                model.lstm.flatten_parameters()
    model.train()

Epoch: 0
Learning rate: 0.0001
Train:
  Metrics:
  Accuracy:       0.4443
  Precision (w):  0.4908
  Recall (w):     0.4443
  Precision (per class):  ['0.2915', '0.5803', '0.4472']
  Recall (per class):  ['0.3905', '0.2318', '0.7446']
  F1 (macro):     0.4080
  F1 (weighted):  0.4162

Confusion Matrix:
     0     1     2
0  562   301   576
1  933  1178  2970
2  433   551  2869
Val:
  Metrics:
  Accuracy:       0.4332
  Precision (w):  0.4809
  Recall (w):     0.4332
  Precision (per class):  ['0.3134', '0.5754', '0.4207']
  Recall (per class):  ['0.3808', '0.2446', '0.7244']
  F1 (macro):     0.4065
  F1 (weighted):  0.4094

Confusion Matrix:
     0    1    2
0  131   79  134
1  193  271  644
2   94  121  565
Epoch: 1
Learning rate: 0.0001
Train:
  Metrics:
  Accuracy:       0.4520
  Precision (w):  0.5338
  Recall (w):     0.4520
  Precision (per class):  ['0.2955', '0.6196', '0.5097']
  Recall (per class):  ['0.6852', '0.1295', '0.7903']
  F1 (macro):     0.4156
  F1 (weighted):  0.3

In [46]:
from pathlib import Path
PROJECT_ROOT = Path(r'C:\Python\projects\CGPAPrediction')

In [47]:
torch.save(model.state_dict(), PROJECT_ROOT / "outputs/models/model_weights.pth")

In [48]:
def serialize(l):
    res = {}
    for i, e in enumerate(l):
        ne = {}
        for k, v in e.items():
            if isinstance(v, np.ndarray):
                ne[k] = v.tolist()
            else:
                ne[k] = v
        res[i + 1] = ne
    return res

In [49]:
learning_data

[{'model': 'Unnamed',
  'accuracy': 0.4443266171792153,
  'precision_w': 0.49080596251399394,
  'recall_w': 0.4443266171792153,
  'precision_per_class': array([0.29149378, 0.58029557, 0.44723305]),
  'recall_per_class': array([0.39054899, 0.23184413, 0.74461459]),
  'f1_macro': 0.40798984669293303,
  'f1_weighted': 0.4161719027494398},
 {'model': 'Unnamed',
  'accuracy': 0.43324372759856633,
  'precision_w': 0.4809436521700903,
  'recall_w': 0.43324372759856633,
  'precision_per_class': array([0.31339713, 0.57537155, 0.42069993]),
  'recall_per_class': array([0.38081395, 0.24458484, 0.72435897]),
  'f1_macro': 0.4064509692075184,
  'f1_weighted': 0.40939615615305214},
 {'model': 'Unnamed',
  'accuracy': 0.45203894726694305,
  'precision_w': 0.5338100006977559,
  'recall_w': 0.45203894726694305,
  'precision_per_class': array([0.29547498, 0.61958569, 0.50970874]),
  'recall_per_class': array([0.68519805, 0.12950207, 0.79029328]),
  'f1_macro': 0.4156155249664375,
  'f1_weighted': 0.3924

In [60]:
learning_train_data = [learning_data[i] for i in range(0, len(learning_data), 2)]
learning_test_data = [learning_data[i] for i in range(1, len(learning_data), 2)]
import json

with open(PROJECT_ROOT / "outputs/learning_data/lstm_attention_train1.json", "w") as f:
    json.dump(serialize(learning_train_data), f, indent=4)

with open(PROJECT_ROOT / "outputs/learning_data/lstm_attention_test1.json", "w") as f:
    json.dump(serialize(learning_test_data), f, indent=4)

In [52]:
from torchinfo import summary

example_input = {
    **x_test
}

summary(model, input_data=example_input, depth=4)

Layer (type:depth-idx)                             Output Shape              Param #
OptimizedGradePredictionModel                      [2212, 3]                 --
├─Embedding: 1-1                                   [2212, 7]                 406
├─Embedding: 1-2                                   [2212, 22]                11,044
├─Embedding: 1-3                                   [2212, 21]                9,303
├─Embedding: 1-4                                   [2212, 8]                 584
├─Embedding: 1-5                                   [2212, 4, 10, 7]          (recursive)
├─Embedding: 1-6                                   [2212, 4, 10, 22]         (recursive)
├─Embedding: 1-7                                   [2212, 4, 10, 21]         (recursive)
├─Embedding: 1-8                                   [2212, 4, 10, 8]          (recursive)
├─OptimizedConditionalAttentionPooling: 1-9        [2212, 4, 64]             --
│    └─Linear: 2-1                                 [2212, 64]         

In [45]:
model.eval()
with torch.no_grad():
    print("Best Train:")
    train_preds = best_model(**train_dataset.x_gpu).argmax(dim=1).cpu().numpy()
    train_true = train_dataset.y_cpu.numpy()
    evaluate_model(train_true, train_preds, learning_data)
    print("Best Val:")
    evaluate_model(y_val, model(**x_val).argmax(dim=1).cpu().numpy(), learning_data)
    print("Best Test:")
    evaluate_model(y_test, model(**x_test).argmax(dim=1).cpu().numpy(), learning_data)

Best Train:


  result = _VF.lstm(


  Metrics:
  Accuracy:       0.9529
  Precision (w):  0.9543
  Recall (w):     0.9529
  Precision (per class):  ['0.9185', '0.9392', '0.9877']
  Recall (per class):  ['0.9944', '0.9663', '0.9195']
  F1 (macro):     0.9533
  F1 (weighted):  0.9528

Confusion Matrix:
      0     1     2
0  1431     8     0
1   127  4910    44
2     0   310  3543
Best Val:
  Metrics:
  Accuracy:       0.8893
  Precision (w):  0.8905
  Recall (w):     0.8893
  Precision (per class):  ['0.8301', '0.8806', '0.9313']
  Recall (per class):  ['0.8663', '0.8989', '0.8859']
  F1 (macro):     0.8818
  F1 (weighted):  0.8896

Confusion Matrix:
     0    1    2
0  298   46    0
1   61  996   51
2    0   89  691
Best Test:
  Metrics:
  Accuracy:       0.8915
  Precision (w):  0.8919
  Recall (w):     0.8915
  Precision (per class):  ['0.8514', '0.8802', '0.9243']
  Recall (per class):  ['0.8384', '0.9013', '0.9000']
  F1 (macro):     0.8825
  F1 (weighted):  0.8916

Confusion Matrix:
     0    1    2
0  275   53    0

In [55]:
for k, v in train_dataset.x_cpu['other_semesters'].items():
    print(f'{k} - {v.shape}')

educational_program - torch.Size([10373, 4, 10])
course - torch.Size([10373, 4, 10])
instructor_name - torch.Size([10373, 4, 10])
instructor_department - torch.Size([10373, 4, 10])
tensor_features - torch.Size([10373, 4, 10, 41])
mask - torch.Size([10373, 4, 10])
