In [1]:
import sys

import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

import pandas as pd
from ncf import NCF
from dataset import Dataset as NCFDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.11.2 (tags/v3.11.2:878ead1, Feb  7 2023, 16:38:35) [MSC v.1934 64 bit (AMD64)]
Pandas version: 2.2.1
Tensorflow version: 2.16.1


In [2]:
# top k items to recommend
TOP_K = 10
THRESHOLD = 0.29

# Model parameters
EPOCHS = 50
BATCH_SIZE = 256

SEED = 42
PATH = 'C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data'

# Train/Test Split

In [3]:
df = pd.read_csv(PATH + '/united_data_encoded.csv')

df.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite,dataset
0,1076,1562,1,moocML
1,1347,855,1,moocML
2,1347,516,0,moocML
3,1347,1274,0,moocML
4,1347,443,0,moocML


In [4]:
df['dataset'].value_counts()

dataset
moocML     6712
al_cpl     6529
drive      2797
moocDSA    2540
Name: count, dtype: int64

In [5]:
concepts = list(set(list(df['conceptA']) + list(df['conceptB'])))

In [6]:
train, test = train_test_split(df, test_size=0.1, random_state=SEED, stratify=df['dataset'])

In [7]:
df.loc[train.index, '_split_set'] = 'train'
df.loc[test.index, '_split_set'] = 'test'

In [8]:
train.shape, test.shape

((16720, 4), (1858, 4))

In [9]:
train_conceptA = list(set(list(train['conceptA'])))
train_conceptB = list(set(list(train['conceptB'])))
test_conceptA = list(set(list(test['conceptA'])))
test_conceptB = list(set(list(test['conceptB'])))

In [10]:
train['dataset'].value_counts() / test['dataset'].value_counts()

dataset
moocML     9.002981
al_cpl     8.998469
drive      8.989286
moocDSA    9.000000
Name: count, dtype: float64

In [11]:
a = [x for x in test_conceptA if x not in train_conceptA]
b = [x for x in test_conceptB if x not in train_conceptB]

In [12]:
maskA = test['conceptA'].isin(a)
maskB = test['conceptB'].isin(b)

In [13]:
train = pd.concat([train, test[maskA | maskB]], axis=0)
test.drop(index=test[maskA | maskB].index, inplace=True)

In [14]:
train.shape, test.shape

((16766, 4), (1812, 4))

Write datasets to csv files.

In [15]:
train_file = PATH + f"/united_train_{SEED}_encoded_stratify.csv"
test_file = PATH + f"/united_test_{SEED}_encoded_stratify.csv"
train.sort_values(by=['conceptA']).to_csv(train_file, index=False)
test.sort_values(by=['conceptA']).to_csv(test_file, index=False)

In [16]:
data = NCFDataset(train_file=train_file, seed=SEED, col_user='conceptA', col_item='conceptB')

INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/united_train_42_encoded_stratify.csv ...


In [17]:
train['conceptA'].nunique(), train['conceptB'].nunique()

(1384, 1600)

In [18]:
data.n_users, data.n_items

(1384, 1600)

In [19]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=16,
    # previously 32
    layer_sizes=[8,4],
    # previously 16, 8, 4
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [20]:
%%time

model.fit(data)

INFO:ncf:Epoch 10 [3.85s]: train_loss = 0.084780 
INFO:ncf:Epoch 20 [4.14s]: train_loss = 0.031712 
INFO:ncf:Epoch 30 [4.18s]: train_loss = 0.013488 
INFO:ncf:Epoch 40 [3.79s]: train_loss = 0.007126 
INFO:ncf:Epoch 50 [3.73s]: train_loss = 0.004529 


CPU times: total: 3min 56s
Wall time: 3min 39s


## Cross-Validation

In [40]:
from sklearn.model_selection import StratifiedKFold

n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, random_state=SEED, shuffle=True)

X = train[['conceptA', 'conceptB']]
y = train['isPrerequisite']

i = 0
for train_index, test_index in skf.split(X, y):
    train_cv = train.iloc[train_index]
    test_cv = train.iloc[test_index] 
    train_file = PATH + f"/cross_validation_train_{i}_split_{n_splits}.csv"
    test_file = PATH + f"/cross_validation_test_{i}_split_{n_splits}.csv"
    i += 1
    
    train_conceptA = list(set(list(train_cv['conceptA'])))
    train_conceptB = list(set(list(train_cv['conceptB'])))
    test_conceptA = list(set(list(test_cv['conceptA'])))
    test_conceptB = list(set(list(test_cv['conceptB'])))
    
    a = [x for x in test_conceptA if x not in train_conceptA]
    b = [x for x in test_conceptB if x not in train_conceptB]
    
    maskA = test_cv['conceptA'].isin(a)
    maskB = test_cv['conceptB'].isin(b)
    
    train_cv = pd.concat([train_cv, test_cv[maskA | maskB]], axis=0)
    test_cv.drop(index=test_cv[maskA | maskB].index, inplace=True)
    
    train_cv.sort_values(by=['conceptA']).to_csv(train_file, index=False)
    test_cv.sort_values(by=['conceptA']).to_csv(test_file, index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_cv.drop(index=test_cv[maskA | maskB].index, inplace=True)


In [48]:
for i in range(n_splits):
    model_cv = NCF(
        n_users=data.n_users,
        n_items=data.n_items,
        model_type="NeuMF",
        n_factors=16,
        # previously 32
        layer_sizes=[8, 4],
        # previously 16, 8, 4
        n_epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        learning_rate=1e-3,
        verbose=10,
        seed=SEED
    )
    
    train_file = PATH + f"/cross_validation_train_{i}_split_{n_splits}.csv"
    test_file = PATH + f"/cross_validation_test_{i}_split_{n_splits}.csv"
    train_cv = NCFDataset(train_file=train_file, seed=SEED, col_user='conceptA', col_item='conceptB')
    model_cv.fit(train_cv)
    
    test_cv = pd.read_csv(test_file)
    predictions = [[row.conceptA, row.conceptB, model.predict(row.conceptA, row.conceptB)]
               for (_, row) in test_cv.iterrows()]

    predictions = pd.DataFrame(predictions, columns=['conceptA', 'conceptB', 'isPrerequisite_pred'])
    predictions['pred'] = (predictions['isPrerequisite_pred'] >= THRESHOLD).astype(int)
    print(f'f1 - split {i}', f1_score(test_cv['isPrerequisite'], predictions['pred']))

INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_0_split_5.csv ...
INFO:ncf:Epoch 10 [1.92s]: train_loss = 0.094765 
INFO:ncf:Epoch 20 [2.71s]: train_loss = 0.040012 
INFO:ncf:Epoch 30 [2.42s]: train_loss = 0.016239 
INFO:ncf:Epoch 40 [2.42s]: train_loss = 0.009295 
INFO:ncf:Epoch 50 [1.87s]: train_loss = 0.005789 


f1 - split 0 0.4672131147540984


INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_1_split_5.csv ...
INFO:ncf:Epoch 10 [2.38s]: train_loss = 0.099104 
INFO:ncf:Epoch 20 [2.65s]: train_loss = 0.041657 
INFO:ncf:Epoch 30 [2.32s]: train_loss = 0.017535 
INFO:ncf:Epoch 40 [2.11s]: train_loss = 0.009298 
INFO:ncf:Epoch 50 [2.03s]: train_loss = 0.005311 


f1 - split 1 0.9983757444504602


INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_2_split_5.csv ...
INFO:ncf:Epoch 10 [2.10s]: train_loss = 0.096921 
INFO:ncf:Epoch 20 [2.13s]: train_loss = 0.039401 
INFO:ncf:Epoch 30 [2.06s]: train_loss = 0.016796 
INFO:ncf:Epoch 40 [1.86s]: train_loss = 0.009378 
INFO:ncf:Epoch 50 [1.85s]: train_loss = 0.006788 


f1 - split 2 0.9989059080962801


INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_3_split_5.csv ...
INFO:ncf:Epoch 10 [1.85s]: train_loss = 0.091965 
INFO:ncf:Epoch 20 [2.22s]: train_loss = 0.036873 
INFO:ncf:Epoch 30 [1.85s]: train_loss = 0.015553 
INFO:ncf:Epoch 40 [1.84s]: train_loss = 0.008538 
INFO:ncf:Epoch 50 [1.85s]: train_loss = 0.005165 


f1 - split 3 0.998914223669924


INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_4_split_5.csv ...
INFO:ncf:Epoch 10 [1.83s]: train_loss = 0.101497 
INFO:ncf:Epoch 20 [1.86s]: train_loss = 0.040502 
INFO:ncf:Epoch 30 [1.86s]: train_loss = 0.017031 
INFO:ncf:Epoch 40 [1.89s]: train_loss = 0.009564 
INFO:ncf:Epoch 50 [1.84s]: train_loss = 0.006121 


f1 - split 4 0.9994556341861731


## 3.4 Prediction and Evaluation

### 3.4.1 Prediction

Now that our model is fitted, we can call `predict` to get some `predictions`. `predict` returns an internal object Prediction which can be easily converted back to a dataframe:

In [21]:
predictions = [[row.conceptA, row.conceptB, model.predict(row.conceptA, row.conceptB)]
               for (_, row) in df.iterrows()]

predictions = pd.DataFrame(predictions, columns=['conceptA', 'conceptB', 'isPrerequisite_pred'])

predictions['isPrerequisite'] = df['isPrerequisite']
predictions['dataset'] = df['dataset']
predictions['_split_set'] = df['_split_set']
sorted_predictions = predictions.sort_values(by='isPrerequisite_pred', ascending=False)
sorted_predictions['pred'] = (sorted_predictions['isPrerequisite_pred'] >= THRESHOLD).astype(int)

In [22]:
sorted_predictions['isPrerequisite'].mean(), df['isPrerequisite'].mean()

(0.2868984820755733, 0.2868984820755733)

In [23]:
sorted_predictions['dataset'].value_counts()

dataset
moocML     6712
al_cpl     6529
drive      2797
moocDSA    2540
Name: count, dtype: int64

In [24]:
df_moocML = sorted_predictions[sorted_predictions['dataset'] == 'moocML']
df_moocDSA = sorted_predictions[sorted_predictions['dataset'] == 'moocDSA']
df_drive = sorted_predictions[sorted_predictions['dataset'] == 'drive']
df_alcpl = sorted_predictions[sorted_predictions['dataset'] == 'al_cpl']

In [25]:
df_test = sorted_predictions[sorted_predictions['_split_set'] == 'test']

# Check Test Set

In [26]:
print(classification_report(df_test['isPrerequisite'], df_test['pred']))

              precision    recall  f1-score   support

           0       0.83      0.99      0.90      1309
           1       0.94      0.53      0.68       549

    accuracy                           0.85      1858
   macro avg       0.89      0.76      0.79      1858
weighted avg       0.86      0.85      0.84      1858



In [125]:
print(classification_report(df_test['isPrerequisite'], df_test['pred']))

              precision    recall  f1-score   support

           0       0.81      0.99      0.89      1309
           1       0.94      0.46      0.62       549

    accuracy                           0.83      1858
   macro avg       0.88      0.73      0.76      1858
weighted avg       0.85      0.83      0.81      1858



In [27]:
print(classification_report(df_test['isPrerequisite'], df_test['pred']))

              precision    recall  f1-score   support

           0       0.75      1.00      0.86      1309
           1       0.98      0.20      0.34       549

    accuracy                           0.76      1858
   macro avg       0.87      0.60      0.60      1858
weighted avg       0.82      0.76      0.70      1858



# Check moocML

In [28]:
print(classification_report(df_moocML['isPrerequisite'], df_moocML['pred']))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98      4977
           1       1.00      0.91      0.95      1735

    accuracy                           0.98      6712
   macro avg       0.98      0.95      0.97      6712
weighted avg       0.98      0.98      0.98      6712



In [29]:
moocML_test = df_moocML[df_moocML['_split_set'] == 'test']

print(classification_report(moocML_test['isPrerequisite'], moocML_test['pred']))
print('precision - ', precision_score(moocML_test['isPrerequisite'], moocML_test['pred']))
print('recall - ', recall_score(moocML_test['isPrerequisite'], moocML_test['pred']))
print('f1 - ', f1_score(moocML_test['isPrerequisite'], moocML_test['pred']))

              precision    recall  f1-score   support

           0       0.76      1.00      0.86       508
           1       1.00      0.02      0.04       163

    accuracy                           0.76       671
   macro avg       0.88      0.51      0.45       671
weighted avg       0.82      0.76      0.66       671

precision -  1.0
recall -  0.018404907975460124
f1 -  0.03614457831325301


In [30]:
df.loc[(df['dataset'] == 'moocML') & (df['_split_set'] == 'test'), 'isPrerequisite'].shape

(671,)

In [31]:
train['isPrerequisite'].sum()

4801

# Check moocDSA

In [32]:
print(classification_report(df_moocDSA['isPrerequisite'], df_moocDSA['pred']))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2061
           1       1.00      0.91      0.95       479

    accuracy                           0.98      2540
   macro avg       0.99      0.96      0.97      2540
weighted avg       0.98      0.98      0.98      2540



In [33]:
moocDSA_test = df_moocDSA[df_moocDSA['_split_set'] == 'test']

print(classification_report(moocDSA_test['isPrerequisite'], moocDSA_test['pred']))
print('precision - ', precision_score(moocDSA_test['isPrerequisite'], moocDSA_test['pred']))
print('recall - ', recall_score(moocDSA_test['isPrerequisite'], moocDSA_test['pred']))
print('f1 - ', f1_score(moocDSA_test['isPrerequisite'], moocDSA_test['pred']))

              precision    recall  f1-score   support

           0       0.83      1.00      0.91       209
           1       1.00      0.04      0.09        45

    accuracy                           0.83       254
   macro avg       0.91      0.52      0.50       254
weighted avg       0.86      0.83      0.76       254

precision -  1.0
recall -  0.044444444444444446
f1 -  0.0851063829787234


# Check drive

In [34]:
print(classification_report(df_drive['isPrerequisite'], df_drive['pred']))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1684
           1       1.00      0.92      0.96      1113

    accuracy                           0.97      2797
   macro avg       0.97      0.96      0.97      2797
weighted avg       0.97      0.97      0.97      2797



In [35]:
drive_test = df_drive[df_drive['_split_set'] == 'test']

print(classification_report(drive_test['isPrerequisite'], drive_test['pred']))
print('precision - ', precision_score(drive_test['isPrerequisite'], drive_test['pred']))
print('recall - ', recall_score(drive_test['isPrerequisite'], drive_test['pred']))
print('f1 - ', f1_score(drive_test['isPrerequisite'], drive_test['pred']))

              precision    recall  f1-score   support

           0       0.65      0.99      0.79       165
           1       0.93      0.24      0.39       115

    accuracy                           0.68       280
   macro avg       0.79      0.62      0.59       280
weighted avg       0.77      0.68      0.62       280

precision -  0.9333333333333333
recall -  0.24347826086956523
f1 -  0.38620689655172413


# Check AL_CPL

In [36]:
print(classification_report(df_alcpl['isPrerequisite'], df_alcpl['pred']))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98      4526
           1       1.00      0.93      0.96      2003

    accuracy                           0.98      6529
   macro avg       0.98      0.96      0.97      6529
weighted avg       0.98      0.98      0.98      6529



In [37]:
alcpl_test = df_alcpl[df_alcpl['_split_set'] == 'test']

print(classification_report(alcpl_test['isPrerequisite'], alcpl_test['pred']))
print('precision - ', precision_score(alcpl_test['isPrerequisite'], alcpl_test['pred']))
print('recall - ', recall_score(alcpl_test['isPrerequisite'], alcpl_test['pred']))
print('f1 - ', f1_score(alcpl_test['isPrerequisite'], alcpl_test['pred']))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85       427
           1       1.00      0.35      0.52       226

    accuracy                           0.77       653
   macro avg       0.87      0.67      0.69       653
weighted avg       0.83      0.77      0.74       653

precision -  1.0
recall -  0.3495575221238938
f1 -  0.5180327868852459


# Generate Pseudo Data

In [39]:
train.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite,dataset
2321,375,1086,0,moocML
5480,87,277,1,moocML
14445,476,392,1,al_cpl
7352,595,214,0,moocDSA
14136,1073,913,0,al_cpl


In [89]:
users, items, preds = [], [], []
item = list(train["conceptB"].unique())
for user in train["conceptA"].unique():
    user = [user] * len(item) 
    users.extend(user)
    items.extend(item)
    preds.extend(list(model.predict(user, item, is_list=True)))

pseudo_predictions = pd.DataFrame(data={"conceptA": users, "conceptB":items, "isPrerequisite_pred":preds})

In [91]:
pseudo_predictions.sort_values(by=['conceptA', 'isPrerequisite_pred'], ascending=[True, False], inplace=True)

In [92]:
vs = train.groupby('conceptA').agg('sum')['isPrerequisite'].reset_index()

In [93]:
vs['k'] = (vs['isPrerequisite'] / 10).apply(np.ceil) 
# aq rorame max (1, np.ceil)

In [95]:
vs.head()

Unnamed: 0,conceptA,isPrerequisite,k
0,0,4,1.0
1,1,6,1.0
2,2,2,1.0
3,3,2,1.0
4,4,6,1.0


In [126]:
vs['k'].value_counts()

k
1.0    909
0.0    389
2.0     65
3.0     10
4.0      4
6.0      3
7.0      2
5.0      1
9.0      1
Name: count, dtype: int64

In [96]:
pseudo_predictions.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred
1572958,0,386,0.999976
1573900,0,656,0.999872
1573881,0,247,0.999083
1573150,0,1574,0.997331
1573791,0,253,0.996238


In [97]:
pseudo_predictions = pseudo_predictions.merge(vs[['conceptA', 'k']], on='conceptA', how='left')

In [98]:
pseudo_predictions

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred,k
0,0,386,9.999757e-01,1.0
1,0,656,9.998721e-01,1.0
2,0,247,9.990830e-01,1.0
3,0,1574,9.973308e-01,1.0
4,0,253,9.962379e-01,1.0
...,...,...,...,...
2214395,1383,1332,4.489960e-20,1.0
2214396,1383,1496,2.018741e-20,1.0
2214397,1383,1338,1.537805e-20,1.0
2214398,1383,167,1.142168e-20,1.0


In [117]:
pseudo_pos = pseudo_predictions.groupby('conceptA').apply(lambda x : x.head(int(x['k'].values[0]))).reset_index(drop=True)
pseudo_neg = pseudo_predictions.groupby('conceptA').apply(lambda x : x.tail(int(x['k'].values[0]))).reset_index(drop=True)

  pseudo_pos = pseudo_predictions.groupby('conceptA').apply(lambda x : x.head(int(x['k'].values[0]))).reset_index(drop=True)
  pseudo_neg = pseudo_predictions.groupby('conceptA').apply(lambda x : x.tail(int(x['k'].values[0]))).reset_index(drop=True)


# Check Top K Bottom K

In [37]:
K = 67

top_k = sorted_predictions.head(K)

bottom_k = sorted_predictions.tail(K)

In [38]:
print(classification_report(top_k['isPrerequisite'], top_k['pred']))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        67

    accuracy                           1.00        67
   macro avg       1.00      1.00      1.00        67
weighted avg       1.00      1.00      1.00        67



In [39]:
print(classification_report(bottom_k['isPrerequisite'], bottom_k['pred']))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        67

    accuracy                           1.00        67
   macro avg       1.00      1.00      1.00        67
weighted avg       1.00      1.00      1.00        67



## 3.5 Pre-training

To get better performance of NeuMF, we can adopt pre-training strategy. We first train GMF and MLP with random initializations until convergence. Then use their model parameters as the initialization for the corresponding parts of NeuMF’s parameters.  Please pay attention to the output layer, where we concatenate weights of the two models with

$$h ^ { N C F } \leftarrow \left[ \begin{array} { c } { \alpha h ^ { G M F } } \\ { ( 1 - \alpha ) h ^ { M L P } } \end{array} \right]$$

where $h^{GMF}$ and $h^{MLP}$ denote the $h$ vector of the pretrained GMF and MLP model, respectively; and $\alpha$ is a
hyper-parameter determining the trade-off between the two pre-trained models. We set $\alpha$ = 0.5.

### 3.5.1 Training GMF and MLP model
`model.save`, we can set the `dir_name` to store the parameters of GMF and MLP

In [40]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="GMF",
    n_factors=32,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [41]:
model.fit(data)

model.save(dir_name=".pretrain/GMF")

INFO:ncf:Epoch 10 [4.53s]: train_loss = 0.127921 
INFO:ncf:Epoch 20 [2.92s]: train_loss = 0.065289 
INFO:ncf:Epoch 30 [3.52s]: train_loss = 0.022822 
INFO:ncf:Epoch 40 [2.79s]: train_loss = 0.007226 
INFO:ncf:Epoch 50 [2.94s]: train_loss = 0.003469 


In [42]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="MLP",
    n_factors=32,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

In [43]:
model.fit(data)

model.save(dir_name=".pretrain/MLP")

INFO:ncf:Epoch 10 [2.86s]: train_loss = 0.123431 
INFO:ncf:Epoch 20 [2.76s]: train_loss = 0.109892 
INFO:ncf:Epoch 30 [4.46s]: train_loss = 0.085276 
INFO:ncf:Epoch 40 [2.68s]: train_loss = 0.065812 
INFO:ncf:Epoch 50 [4.19s]: train_loss = 0.048528 


### 3.5.2 Load pre-trained GMF and MLP model for NeuMF
`model.load`, we can set the `gmf_dir` and `mlp_dir` to store the parameters for NeuMF.

In [44]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=32,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

model.load(gmf_dir=".pretrain/GMF", mlp_dir=".pretrain/MLP", alpha=0.5)

In [45]:
model.fit(data)

INFO:ncf:Epoch 10 [4.18s]: train_loss = 0.002550 
INFO:ncf:Epoch 20 [5.03s]: train_loss = 0.001300 
INFO:ncf:Epoch 30 [3.64s]: train_loss = 0.000683 
INFO:ncf:Epoch 40 [3.90s]: train_loss = 0.000822 
INFO:ncf:Epoch 50 [3.74s]: train_loss = 0.000433 


### 3.5.3 Compare with not pre-trained NeuMF

You can use beforementioned evaluation methods to evaluate the pre-trained `NCF` Model. Usually, we will find the performance of pre-trained NCF is better than the not pre-trained.

In [46]:
predictions = [[row.conceptA, row.conceptB, model.predict(row.conceptA, row.conceptB)]
               for (_, row) in test.iterrows()]

predictions = pd.DataFrame(predictions, columns=['conceptA', 'conceptB', 'isPrerequisite_pred'])

test.index = predictions.index
predictions['isPrerequisite'] = test['isPrerequisite']
sorted_predictions = predictions.sort_values(by='isPrerequisite_pred', ascending=False)
sorted_predictions['pred'] = (sorted_predictions['isPrerequisite_pred'] >= THRESHOLD).astype(int)

In [47]:
print(classification_report(sorted_predictions['isPrerequisite'], sorted_predictions['pred']))

              precision    recall  f1-score   support

           0       0.76      1.00      0.86      1283
           1       0.97      0.24      0.38       529

    accuracy                           0.78      1812
   macro avg       0.86      0.62      0.62      1812
weighted avg       0.82      0.78      0.72      1812



In [48]:
K = 67

top_k = sorted_predictions.head(K)

bottom_k = sorted_predictions.tail(K)

In [49]:
top_k

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred,isPrerequisite,pred
111,1232,1331,1.000000,1,1
36,75,1502,1.000000,1,1
1734,1344,1093,0.999999,1,1
157,1021,1289,0.999999,1,1
1030,1273,1376,0.999994,1,1
...,...,...,...,...,...
264,307,5,0.991381,1,1
1427,662,510,0.990228,1,1
955,1007,879,0.989968,1,1
425,508,1502,0.989865,1,1


In [50]:
print(classification_report(top_k['isPrerequisite'], top_k['pred']))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.97      1.00      0.98        65

    accuracy                           0.97        67
   macro avg       0.49      0.50      0.49        67
weighted avg       0.94      0.97      0.96        67



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [51]:
print(classification_report(bottom_k['isPrerequisite'], bottom_k['pred']))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        67

    accuracy                           1.00        67
   macro avg       1.00      1.00      1.00        67
weighted avg       1.00      1.00      1.00        67



### Reference: 
1. Xiangnan He, Lizi Liao, Hanwang Zhang, Liqiang Nie, Xia Hu & Tat-Seng Chua, Neural Collaborative Filtering, 2017, https://arxiv.org/abs/1708.05031

2. Official NCF implementation [Keras with Theano]: https://github.com/hexiangnan/neural_collaborative_filtering

3. Other nice NCF implementation [Pytorch]: https://github.com/LaceyChen17/neural-collaborative-filtering