In [1]:
import pandas as pd
from ContentBasedLearning import ContentBasedLearning
from GraphBasedLearning import GraphBasedLearning
from sklearn.metrics import classification_report

In [2]:
import os
PATH = os.path.dirname(os.getcwd())

In [3]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x25022595af0>

# AKD


In [4]:
def unite_pseudo_data(path, pseudo_data, i, n):
    train = pd.read_csv(path)

    new_train = pd.concat([train, pseudo_data], axis=0).sort_values(by=['conceptA'])
    
    new_train[['conceptA', 'conceptB']] = new_train[['conceptA', 'conceptB']].astype(int)
    
    path_to_write = os.path.join(PATH, f'data/pseudo/akd_united_pseudo_data_fixed_{i}_{n}.csv')
    
    new_train.to_csv(path_to_write, index=False)
    
    return path_to_write, new_train['isPrerequisite'].mean()

In [5]:
train_path = os.path.join(PATH, r'data/train_test/train.csv')
test_file = os.path.join(PATH, r'data/train_test/test.csv')

for i in range(1, 5):
    print()
    print("#"*50)
    print(f"Iteration {i}")
    print("#"*50)
    print()
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    cbl = ContentBasedLearning(input_size=2048, device=device)
    
    x_train, y_train, x_test, y_test = cbl.create_train_test_dataset(train_file=train_path, test_file=test_file)
    
    cbl.train_model(x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, num_epochs=500)
    
    pseudo_nn = cbl.generate_pseudo_data(train_file=train_path)
    
    train_path, threshold = unite_pseudo_data(train_path, pseudo_nn, i, 0)
    
    print(train_path)
    
    gbl = GraphBasedLearning('conceptA', 'conceptB')
    
    gbl.train(train_path)
    
    pred = gbl.predict(threshold)       
    df_test = pred[pred['_split_set'] == 'test']
    print(classification_report(df_test['isPrerequisite'], df_test['pred']))
    
    pseudo_ncf = gbl.generate_pseudo_data(train_path)
    
    train_path, _ = unite_pseudo_data(train_path, pseudo_nn, i, 1)
    
    print(train_path)


##################################################
Iteration 1
##################################################

x_train shape: torch.Size([14846, 2048])
y_train shape: torch.Size([14846])
x_test shape: torch.Size([3577, 2048])
y_test shape: torch.Size([3577])
Epoch [10/500], Loss: 0.5987, Train Accuracy: 0.7142, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [20/500], Loss: 0.5903, Train Accuracy: 0.7142, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [30/500], Loss: 0.5728, Train Accuracy: 0.7142, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [40/500], Loss: 0.5390, Train Accuracy: 0.7142, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [50/500], Loss: 0.4999, Train Accuracy: 0.7203, Train F1: 0.0490, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [60/500], Loss: 0.4632, Train Accuracy: 0.7432, Train F1: 0.2430, Test Accuracy: 0.7621, Test F1: 0.3519
Epoch [70/500], Loss: 0.4315, Train Accuracy: 0.7911, Train F1: 0.5109

100%|██████████| 1384/1384 [08:02<00:00,  2.87it/s]


Original positives: 4243, Pseudo positives: 1049


INFO:ncf.dataset:Indexing C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_1_0.csv ...


C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_1_0.csv


INFO:ncf.ncf:Epoch 20 [1.76s]: train_loss = 0.039472 


              precision    recall  f1-score   support

           0       0.89      0.97      0.93      2652
           1       0.90      0.69      0.78      1064

    accuracy                           0.89      3716
   macro avg       0.89      0.83      0.85      3716
weighted avg       0.89      0.89      0.88      3716

C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_1_1.csv

##################################################
Iteration 2
##################################################

x_train shape: torch.Size([16944, 2048])
y_train shape: torch.Size([16944])
x_test shape: torch.Size([3577, 2048])
y_test shape: torch.Size([3577])
Epoch [10/500], Loss: 0.6153, Train Accuracy: 0.6877, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [20/500], Loss: 0.5878, Train Accuracy: 0.6877, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [30/500], Loss: 0.5506, Train Accuracy: 0.7041, Train F1: 0.09

100%|██████████| 1384/1384 [08:09<00:00,  2.83it/s]


Original positives: 5292, Pseudo positives: 1307


INFO:ncf.dataset:Indexing C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_2_0.csv ...


C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_2_0.csv


INFO:ncf.ncf:Epoch 20 [1.72s]: train_loss = 0.042443 


              precision    recall  f1-score   support

           0       0.87      0.97      0.92      2652
           1       0.91      0.62      0.74      1064

    accuracy                           0.87      3716
   macro avg       0.89      0.80      0.83      3716
weighted avg       0.88      0.87      0.87      3716

C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_2_1.csv

##################################################
Iteration 3
##################################################

x_train shape: torch.Size([19558, 2048])
y_train shape: torch.Size([19558])
x_test shape: torch.Size([3577, 2048])
y_test shape: torch.Size([3577])
Epoch [10/500], Loss: 0.6106, Train Accuracy: 0.6626, Train F1: 0.0000, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [20/500], Loss: 0.5426, Train Accuracy: 0.7766, Train F1: 0.5259, Test Accuracy: 0.7442, Test F1: 0.2407
Epoch [30/500], Loss: 0.4724, Train Accuracy: 0.7968, Train F1: 0.60

100%|██████████| 1384/1384 [08:34<00:00,  2.69it/s]


Original positives: 6599, Pseudo positives: 1634


INFO:ncf.dataset:Indexing C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_3_0.csv ...


C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_3_0.csv


INFO:ncf.ncf:Epoch 20 [1.75s]: train_loss = 0.039793 


              precision    recall  f1-score   support

           0       0.85      0.98      0.91      2652
           1       0.92      0.56      0.69      1064

    accuracy                           0.86      3716
   macro avg       0.88      0.77      0.80      3716
weighted avg       0.87      0.86      0.85      3716

C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_3_1.csv

##################################################
Iteration 4
##################################################

x_train shape: torch.Size([22826, 2048])
y_train shape: torch.Size([22826])
x_test shape: torch.Size([3577, 2048])
y_test shape: torch.Size([3577])
Epoch [10/500], Loss: 0.6169, Train Accuracy: 0.6394, Train F1: 0.0007, Test Accuracy: 0.7157, Test F1: 0.0000
Epoch [20/500], Loss: 0.5200, Train Accuracy: 0.8055, Train F1: 0.6612, Test Accuracy: 0.7422, Test F1: 0.3516
Epoch [30/500], Loss: 0.4406, Train Accuracy: 0.8179, Train F1: 0.70

100%|██████████| 1384/1384 [09:09<00:00,  2.52it/s]


Original positives: 8233, Pseudo positives: 2008


INFO:ncf.dataset:Indexing C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_4_0.csv ...


C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_4_0.csv


INFO:ncf.ncf:Epoch 20 [2.10s]: train_loss = 0.036068 


              precision    recall  f1-score   support

           0       0.83      0.98      0.90      2652
           1       0.93      0.50      0.65      1064

    accuracy                           0.85      3716
   macro avg       0.88      0.74      0.78      3716
weighted avg       0.86      0.85      0.83      3716

C:\Users\achiq\PycharmProjects\bachelor-project-prerequisite-learning\data/pseudo/akd_united_pseudo_data_fixed_4_1.csv


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cbl = ContentBasedLearning(input_size=2048, device=device)

x_train, y_train, x_test, y_test = cbl.create_train_test_dataset(train_file=train_path, test_file=test_file)

cbl.train_model(x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, num_epochs=500)

x_train shape: torch.Size([26842, 2048])
y_train shape: torch.Size([26842])
x_test shape: torch.Size([3577, 2048])
y_test shape: torch.Size([3577])
Epoch [10/500], Loss: 0.5851, Train Accuracy: 0.6746, Train F1: 0.2737, Test Accuracy: 0.7417, Test F1: 0.2364
Epoch [20/500], Loss: 0.4653, Train Accuracy: 0.8322, Train F1: 0.7623, Test Accuracy: 0.7358, Test F1: 0.4090
Epoch [30/500], Loss: 0.3791, Train Accuracy: 0.8377, Train F1: 0.7691, Test Accuracy: 0.7571, Test F1: 0.3859
Epoch [40/500], Loss: 0.3201, Train Accuracy: 0.8730, Train F1: 0.8149, Test Accuracy: 0.7755, Test F1: 0.4902
Epoch [50/500], Loss: 0.2901, Train Accuracy: 0.8797, Train F1: 0.8295, Test Accuracy: 0.7881, Test F1: 0.5298
Epoch [60/500], Loss: 0.2667, Train Accuracy: 0.8907, Train F1: 0.8448, Test Accuracy: 0.8010, Test F1: 0.5767
Epoch [70/500], Loss: 0.2469, Train Accuracy: 0.8983, Train F1: 0.8569, Test Accuracy: 0.8099, Test F1: 0.6083
Epoch [80/500], Loss: 0.2320, Train Accuracy: 0.9041, Train F1: 0.8663, Tes

In [8]:
torch.save(cbl.model.state_dict(), os.path.join(PATH, r'models/model_final.pt'))

In [9]:
torch.save(cbl.model.state_dict(), os.path.join(PATH, r'server/model/model_final.pt'))