In [2]:
import sys

import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

import itertools

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

from ncf import NCF
from dataset import Dataset as NCFDataset

import warnings
warnings.filterwarnings('ignore')

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.11.2 (tags/v3.11.2:878ead1, Feb  7 2023, 16:38:35) [MSC v.1934 64 bit (AMD64)]
Pandas version: 2.2.1
Tensorflow version: 2.16.1


In [3]:
# top k items to recommend
TOP_K = 10
THRESHOLD = 0.29

# Model parameters
EPOCHS = 50
BATCH_SIZE = 256

n_splits = 5

SEED = 42
PATH = 'C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data'
train_file = PATH + f"/united_train_{SEED}_encoded_stratify.csv"
test_file = PATH + f"/united_test_{SEED}_encoded_stratify.csv"

# Train/Test Split

In [4]:
df = pd.read_csv(PATH + '/united_data_encoded.csv')

df.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite,dataset
0,1076,1562,1,moocML
1,1347,855,1,moocML
2,1347,516,0,moocML
3,1347,1274,0,moocML
4,1347,443,0,moocML


In [5]:
df['dataset'].value_counts()

dataset
moocML     6712
al_cpl     6529
drive      2797
moocDSA    2540
Name: count, dtype: int64

In [6]:
concepts = list(set(list(df['conceptA']) + list(df['conceptB'])))

In [7]:
train, test = train_test_split(df, test_size=0.1, random_state=SEED, stratify=df['dataset'])

In [8]:
df.loc[train.index, '_split_set'] = 'train'
df.loc[test.index, '_split_set'] = 'test'

In [9]:
train.shape, test.shape

((16720, 4), (1858, 4))

In [10]:
train_conceptA = list(set(list(train['conceptA'])))
train_conceptB = list(set(list(train['conceptB'])))
test_conceptA = list(set(list(test['conceptA'])))
test_conceptB = list(set(list(test['conceptB'])))

In [11]:
train['dataset'].value_counts() / test['dataset'].value_counts()

dataset
moocML     9.002981
al_cpl     8.998469
drive      8.989286
moocDSA    9.000000
Name: count, dtype: float64

In [12]:
a = [x for x in test_conceptA if x not in train_conceptA]
b = [x for x in test_conceptB if x not in train_conceptB]

In [13]:
maskA = test['conceptA'].isin(a)
maskB = test['conceptB'].isin(b)

In [14]:
train = pd.concat([train, test[maskA | maskB]], axis=0)
test.drop(index=test[maskA | maskB].index, inplace=True)

In [15]:
train.shape, test.shape

((16766, 4), (1812, 4))

Write datasets to csv files.

In [15]:
train.sort_values(by=['conceptA']).to_csv(train_file, index=False)
test.sort_values(by=['conceptA']).to_csv(test_file, index=False)

# Cross Validation

## Write Splits In Files

In [15]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=n_splits, random_state=SEED, shuffle=True)

X = train[['conceptA', 'conceptB']]
y = train['isPrerequisite']

i = 0
for train_index, test_index in skf.split(X, y):
    train_cv = train.iloc[train_index]
    test_cv = train.iloc[test_index] 
    train_file = PATH + f"/cross_validation_train_{i}_split_{n_splits}.csv"
    test_file = PATH + f"/cross_validation_test_{i}_split_{n_splits}.csv"
    i += 1
    
    train_conceptA = list(set(list(train_cv['conceptA'])))
    train_conceptB = list(set(list(train_cv['conceptB'])))
    test_conceptA = list(set(list(test_cv['conceptA'])))
    test_conceptB = list(set(list(test_cv['conceptB'])))
    
    a = [x for x in test_conceptA if x not in train_conceptA]
    b = [x for x in test_conceptB if x not in train_conceptB]
    
    maskA = test_cv['conceptA'].isin(a)
    maskB = test_cv['conceptB'].isin(b)
    
    train_cv = pd.concat([train_cv, test_cv[maskA | maskB]], axis=0)
    test_cv.drop(index=test_cv[maskA | maskB].index, inplace=True)
    
    train_cv.sort_values(by=['conceptA']).to_csv(train_file, index=False)
    test_cv.sort_values(by=['conceptA']).to_csv(test_file, index=False)

## Cross Validate

In [16]:
hyper_params = {
    # 'n_factors' : [12, 16, 20],
    'n_factors' : [16],
    # 'layer_sizes' : [[4], [8, 4], [16, 8, 4]],
    'layer_sizes' : [[8, 4]],
    # 'n_epochs' : [10, 20, 30],
    'n_epochs' : [20],
    # 'batch_size' : [128, 256, 512],
    'batch_size' : [256],
    # 'learning_rate' : [0.001, 0.01, 0.1]
    'learning_rate' : [0.001]
}


In [17]:
# Generate all combinations
combinations = list(itertools.product(
    hyper_params['n_factors'], 
    hyper_params['layer_sizes'], 
    hyper_params['n_epochs'], hyper_params['batch_size'], 
    hyper_params['learning_rate'] 
))

# Format the combinations into dictionaries
formatted_combinations = [
    {
        'n_factors': combo[0], 
        'layer_sizes': combo[1],
        'n_epochs': combo[2],
        'batch_size': combo[3],
        'learning_rate': combo[4]
    } for combo in combinations
]

In [18]:
%%time

best_combo = {}
best_score = 0
for combo in formatted_combinations:
    score = 0
    print(combo)
    for i in range(n_splits):
        train_file = PATH + f"/cross_validation_train_{i}_split_{n_splits}.csv"
        test_file = PATH + f"/cross_validation_test_{i}_split_{n_splits}.csv"
        train_cv = NCFDataset(train_file=train_file, seed=SEED, col_user='conceptA', col_item='conceptB')
        
        model_cv = NCF(
            n_users=train_cv.n_users,
            n_items=train_cv.n_items,
            model_type="NeuMF",
            n_factors=combo['n_factors'],
            layer_sizes=combo['layer_sizes'],
            n_epochs=combo['n_epochs'],
            batch_size=combo['batch_size'],
            learning_rate=combo['learning_rate'],
            verbose=combo['n_epochs'],
            seed=SEED
        )
        
        model_cv.fit(train_cv)
        
        test_cv = pd.read_csv(test_file)
        predictions = [[row.conceptA, row.conceptB, model_cv.predict(row.conceptA, row.conceptB)]
                   for (_, row) in test_cv.iterrows()]
    
        predictions = pd.DataFrame(predictions, columns=['conceptA', 'conceptB', 'isPrerequisite_pred'])
        predictions['pred'] = (predictions['isPrerequisite_pred'] >= THRESHOLD).astype(int)
        score_split = f1_score(test_cv['isPrerequisite'], predictions['pred'])
        print(f'f1 - split {i}', score_split)
        score += score_split
    score /= n_splits
    print('split score', score)
    if score > best_score:
        best_score = score
        best_combo = combo

INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_0_split_5.csv ...


{'n_factors': 12, 'layer_sizes': [8, 4], 'n_epochs': 20, 'batch_size': 256, 'learning_rate': 0.001}


INFO:ncf:Epoch 20 [1.97s]: train_loss = 0.055609 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_1_split_5.csv ...


f1 - split 0 0.7221510883482715


INFO:ncf:Epoch 20 [1.98s]: train_loss = 0.061843 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_2_split_5.csv ...


f1 - split 1 0.7245283018867924


INFO:ncf:Epoch 20 [2.06s]: train_loss = 0.056123 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_3_split_5.csv ...


f1 - split 2 0.7326984126984127


INFO:ncf:Epoch 20 [2.05s]: train_loss = 0.057781 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_4_split_5.csv ...


f1 - split 3 0.7196439923712651


INFO:ncf:Epoch 20 [1.96s]: train_loss = 0.059197 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_0_split_5.csv ...


f1 - split 4 0.7248576850094877
split score 0.724775896062846
{'n_factors': 16, 'layer_sizes': [8, 4], 'n_epochs': 20, 'batch_size': 256, 'learning_rate': 0.001}


INFO:ncf:Epoch 20 [2.56s]: train_loss = 0.040012 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_1_split_5.csv ...


f1 - split 0 0.7315823190262652


INFO:ncf:Epoch 20 [3.02s]: train_loss = 0.041657 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_2_split_5.csv ...


f1 - split 1 0.7306967984934086


INFO:ncf:Epoch 20 [2.97s]: train_loss = 0.039401 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_3_split_5.csv ...


f1 - split 2 0.752851711026616


INFO:ncf:Epoch 20 [3.07s]: train_loss = 0.036873 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_4_split_5.csv ...


f1 - split 3 0.7206068268015171


INFO:ncf:Epoch 20 [2.86s]: train_loss = 0.040502 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_0_split_5.csv ...


f1 - split 4 0.729456384323641
split score 0.7330388079342895
{'n_factors': 20, 'layer_sizes': [8, 4], 'n_epochs': 20, 'batch_size': 256, 'learning_rate': 0.001}


INFO:ncf:Epoch 20 [1.93s]: train_loss = 0.031326 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_1_split_5.csv ...


f1 - split 0 0.7278603749191984


INFO:ncf:Epoch 20 [1.92s]: train_loss = 0.030972 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_2_split_5.csv ...


f1 - split 1 0.7270408163265306


INFO:ncf:Epoch 20 [1.94s]: train_loss = 0.031031 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_3_split_5.csv ...


f1 - split 2 0.7411689145793192


INFO:ncf:Epoch 20 [2.55s]: train_loss = 0.031698 
INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/cross_validation_train_4_split_5.csv ...


f1 - split 3 0.7218710493046776


INFO:ncf:Epoch 20 [2.16s]: train_loss = 0.029308 


f1 - split 4 0.7256410256410256
split score 0.7287164361541503
CPU times: total: 13min 29s
Wall time: 12min 4s


In [19]:
best_combo, best_score

({'n_factors': 16,
  'layer_sizes': [8, 4],
  'n_epochs': 20,
  'batch_size': 256,
  'learning_rate': 0.001},
 0.7330388079342895)

# Final Model Train

In [16]:
data = NCFDataset(train_file=train_file, seed=SEED, col_user='conceptA', col_item='conceptB')

INFO:dataset:Indexing C:/Users/Luka/Documents/University/bachelor-project-prerequisite-learning/generated_data/united_train_42_encoded_stratify.csv ...


In [17]:
data.n_users, data.n_items

(1384, 1600)

In [18]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=16,
    layer_sizes=[8,4],
    # previously 16, 8, 4
    n_epochs=20,
    batch_size=256,
    learning_rate=0.001,
    verbose=20, 
    seed=SEED
)

In [19]:
%%time

model.fit(data)

INFO:ncf:Epoch 20 [2.37s]: train_loss = 0.031712 


CPU times: total: 1min 12s
Wall time: 1min 27s


## 3.4 Prediction and Evaluation

### 3.4.1 Prediction

Now that our model is fitted, we can call `predict` to get some `predictions`. `predict` returns an internal object Prediction which can be easily converted back to a dataframe:

In [20]:
THRESHOLD

0.29

In [21]:
predictions = [[row.conceptA, row.conceptB, model.predict(row.conceptA, row.conceptB)]
               for (_, row) in df.iterrows()]

predictions = pd.DataFrame(predictions, columns=['conceptA', 'conceptB', 'isPrerequisite_pred'])

predictions['isPrerequisite'] = df['isPrerequisite']
predictions['dataset'] = df['dataset']
predictions['_split_set'] = df['_split_set']
sorted_predictions = predictions.sort_values(by='isPrerequisite_pred', ascending=False)
sorted_predictions['pred'] = (sorted_predictions['isPrerequisite_pred'] >= THRESHOLD).astype(int)

In [22]:
sorted_predictions['isPrerequisite'].mean(), df['isPrerequisite'].mean(), sorted_predictions['isPrerequisite_pred'].mean()

(0.2868984820755733, 0.2868984820755733, 0.24457146153110715)

In [23]:
sorted_predictions['dataset'].value_counts()

dataset
moocML     6712
al_cpl     6529
drive      2797
moocDSA    2540
Name: count, dtype: int64

In [24]:
df_moocML = sorted_predictions[sorted_predictions['dataset'] == 'moocML']
df_moocDSA = sorted_predictions[sorted_predictions['dataset'] == 'moocDSA']
df_drive = sorted_predictions[sorted_predictions['dataset'] == 'drive']
df_alcpl = sorted_predictions[sorted_predictions['dataset'] == 'al_cpl']

In [25]:
df_test = sorted_predictions[sorted_predictions['_split_set'] == 'test']

# Check Test Set

In [26]:
print(classification_report(df_test['isPrerequisite'], df_test['pred']))

              precision    recall  f1-score   support

           0       0.89      0.96      0.92      1309
           1       0.88      0.72      0.79       549

    accuracy                           0.89      1858
   macro avg       0.89      0.84      0.86      1858
weighted avg       0.89      0.89      0.88      1858



In [27]:
print(precision_score(df_test['isPrerequisite'], df_test['pred']), recall_score(df_test['isPrerequisite'], df_test['pred']), f1_score(df_test['isPrerequisite'], df_test['pred']))

0.8834080717488789 0.7176684881602914 0.7919597989949749


# Generate Pseudo Data

In [98]:
train.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite,dataset
2321,375,1086,0,moocML
5480,87,277,1,moocML
14445,476,392,1,al_cpl
7352,595,214,0,moocDSA
14136,1073,913,0,al_cpl


In [29]:
users, items, preds = [], [], []
item = list(train["conceptB"].unique())
for user in train["conceptA"].unique():
    user = [user] * len(item) 
    users.extend(user)
    items.extend(item)
    preds.extend(list(model.predict(user, item, is_list=True)))

pseudo_predictions = pd.DataFrame(data={"conceptA": users, "conceptB":items, "isPrerequisite_pred":preds})

In [30]:
pseudo_predictions.shape, train.shape

((2214400, 3), (16766, 4))

In [31]:
train.nunique()

conceptA          1384
conceptB          1600
isPrerequisite       2
dataset              4
dtype: int64

In [32]:
pseudo_predictions.sort_values(by=['conceptA', 'isPrerequisite_pred'], ascending=[True, False], inplace=True)

In [33]:
pseudo_predictions.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred
1573150,0,1574,0.987543
1573900,0,656,0.984856
1572958,0,386,0.977696
1572957,0,1512,0.977597
1573791,0,253,0.971211


In [55]:
vs = train.groupby('conceptA').agg('sum')['isPrerequisite'].reset_index()

In [56]:
vs

Unnamed: 0,conceptA,isPrerequisite
0,0,4
1,1,6
2,2,2
3,3,2
4,4,6
...,...,...
1379,1379,0
1380,1380,0
1381,1381,2
1382,1382,0


In [121]:
def biased_coin_toss(n, p=0.25):
    # Simulate n coin tosses with probability p of getting 1
    results = np.random.binomial(1, p, n)
    # Count the number of 1s
    count_of_ones = np.sum(results)
    return count_of_ones

In [124]:
np.random.seed(SEED)

vs['k'] = (vs['isPrerequisite']).apply(biased_coin_toss)

vs['isPrerequisite'].sum(), vs['k'].sum()

In [126]:
pseudo_predictions.head()

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred
1573150,0,1574,0.987543
1573900,0,656,0.984856
1572958,0,386,0.977696
1572957,0,1512,0.977597
1573791,0,253,0.971211


In [127]:
pseudo_predictions = pseudo_predictions.merge(vs[['conceptA', 'k']], on='conceptA', how='left')

In [128]:
pseudo_predictions

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred,k
0,0,1574,0.987543,1
1,0,656,0.984856,1
2,0,386,0.977696,1
3,0,1512,0.977597,1
4,0,253,0.971211,1
...,...,...,...,...
2214395,1383,1086,0.000888,0
2214396,1383,1305,0.000880,0
2214397,1383,923,0.000772,0
2214398,1383,1266,0.000769,0


In [129]:
pseudo_pos = pseudo_predictions.groupby('conceptA').apply(lambda x : x.head(int(x['k'].values[0]))).reset_index(drop=True)
pseudo_neg = pseudo_predictions.groupby('conceptA').apply(lambda x : x.tail(int(x['k'].values[0]))).reset_index(drop=True)

## 3.5 Pre-training

To get better performance of NeuMF, we can adopt pre-training strategy. We first train GMF and MLP with random initializations until convergence. Then use their model parameters as the initialization for the corresponding parts of NeuMF’s parameters.  Please pay attention to the output layer, where we concatenate weights of the two models with

$$h ^ { N C F } \leftarrow \left[ \begin{array} { c } { \alpha h ^ { G M F } } \\ { ( 1 - \alpha ) h ^ { M L P } } \end{array} \right]$$

where $h^{GMF}$ and $h^{MLP}$ denote the $h$ vector of the pretrained GMF and MLP model, respectively; and $\alpha$ is a
hyper-parameter determining the trade-off between the two pre-trained models. We set $\alpha$ = 0.5.

### 3.5.1 Training GMF and MLP model
`model.save`, we can set the `dir_name` to store the parameters of GMF and MLP

In [40]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="GMF",
    n_factors=32,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [41]:
model.fit(data)

model.save(dir_name=".pretrain/GMF")

INFO:ncf:Epoch 10 [4.53s]: train_loss = 0.127921 
INFO:ncf:Epoch 20 [2.92s]: train_loss = 0.065289 
INFO:ncf:Epoch 30 [3.52s]: train_loss = 0.022822 
INFO:ncf:Epoch 40 [2.79s]: train_loss = 0.007226 
INFO:ncf:Epoch 50 [2.94s]: train_loss = 0.003469 


In [42]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="MLP",
    n_factors=32,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

In [43]:
model.fit(data)

model.save(dir_name=".pretrain/MLP")

INFO:ncf:Epoch 10 [2.86s]: train_loss = 0.123431 
INFO:ncf:Epoch 20 [2.76s]: train_loss = 0.109892 
INFO:ncf:Epoch 30 [4.46s]: train_loss = 0.085276 
INFO:ncf:Epoch 40 [2.68s]: train_loss = 0.065812 
INFO:ncf:Epoch 50 [4.19s]: train_loss = 0.048528 


### 3.5.2 Load pre-trained GMF and MLP model for NeuMF
`model.load`, we can set the `gmf_dir` and `mlp_dir` to store the parameters for NeuMF.

In [44]:
model = NCF(
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=32,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

model.load(gmf_dir=".pretrain/GMF", mlp_dir=".pretrain/MLP", alpha=0.5)

In [45]:
model.fit(data)

INFO:ncf:Epoch 10 [4.18s]: train_loss = 0.002550 
INFO:ncf:Epoch 20 [5.03s]: train_loss = 0.001300 
INFO:ncf:Epoch 30 [3.64s]: train_loss = 0.000683 
INFO:ncf:Epoch 40 [3.90s]: train_loss = 0.000822 
INFO:ncf:Epoch 50 [3.74s]: train_loss = 0.000433 


### 3.5.3 Compare with not pre-trained NeuMF

You can use beforementioned evaluation methods to evaluate the pre-trained `NCF` Model. Usually, we will find the performance of pre-trained NCF is better than the not pre-trained.

In [46]:
predictions = [[row.conceptA, row.conceptB, model.predict(row.conceptA, row.conceptB)]
               for (_, row) in test.iterrows()]

predictions = pd.DataFrame(predictions, columns=['conceptA', 'conceptB', 'isPrerequisite_pred'])

test.index = predictions.index
predictions['isPrerequisite'] = test['isPrerequisite']
sorted_predictions = predictions.sort_values(by='isPrerequisite_pred', ascending=False)
sorted_predictions['pred'] = (sorted_predictions['isPrerequisite_pred'] >= THRESHOLD).astype(int)

In [47]:
print(classification_report(sorted_predictions['isPrerequisite'], sorted_predictions['pred']))

              precision    recall  f1-score   support

           0       0.76      1.00      0.86      1283
           1       0.97      0.24      0.38       529

    accuracy                           0.78      1812
   macro avg       0.86      0.62      0.62      1812
weighted avg       0.82      0.78      0.72      1812



In [48]:
K = 67

top_k = sorted_predictions.head(K)

bottom_k = sorted_predictions.tail(K)

In [49]:
top_k

Unnamed: 0,conceptA,conceptB,isPrerequisite_pred,isPrerequisite,pred
111,1232,1331,1.000000,1,1
36,75,1502,1.000000,1,1
1734,1344,1093,0.999999,1,1
157,1021,1289,0.999999,1,1
1030,1273,1376,0.999994,1,1
...,...,...,...,...,...
264,307,5,0.991381,1,1
1427,662,510,0.990228,1,1
955,1007,879,0.989968,1,1
425,508,1502,0.989865,1,1


In [50]:
print(classification_report(top_k['isPrerequisite'], top_k['pred']))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.97      1.00      0.98        65

    accuracy                           0.97        67
   macro avg       0.49      0.50      0.49        67
weighted avg       0.94      0.97      0.96        67



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [51]:
print(classification_report(bottom_k['isPrerequisite'], bottom_k['pred']))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        67

    accuracy                           1.00        67
   macro avg       1.00      1.00      1.00        67
weighted avg       1.00      1.00      1.00        67



### Reference: 
1. Xiangnan He, Lizi Liao, Hanwang Zhang, Liqiang Nie, Xia Hu & Tat-Seng Chua, Neural Collaborative Filtering, 2017, https://arxiv.org/abs/1708.05031

2. Official NCF implementation [Keras with Theano]: https://github.com/hexiangnan/neural_collaborative_filtering

3. Other nice NCF implementation [Pytorch]: https://github.com/LaceyChen17/neural-collaborative-filtering