This notebook shows how to use a `GraphCreator` instance in a recommendation pipeline to easily produce the top recommendations and display their predicted order (before/after)

In [1]:
%load_ext autoreload
%autoreload 1

import sys
sys.path.append('../utils/')

import pickle
import numpy as np
import pandas as pd

from GraphAPI import GraphCreator
from RecommenderPipeline import Recommender

%aimport GraphAPI
%aimport RecommenderPipeline

# Load in Models

In [2]:
with open("../models/rf_classifier_v2.pkl", "rb") as model:
    rf_classifier = pickle.load(model)
    
with open("../models/xg_model_semisupervised_v2.pkl", "rb") as model:
    xg_classifier = pickle.load(model)

# Initialize `GraphCreator` Instance

After initialization, pass as an argument to a new recommender instance

In [6]:
gc = GraphCreator("Random forest")

rec = Recommender(gc)

# Fit the Recommender 

In [7]:
rec.fit()

# Make Predictions
Pass in your model to make predictions on the data

In [8]:
rec.predict(rf_classifier)
# rec.predict(xg_classifier)

# Format the Results
Will return as a dictionary containing the entry node and the predictions of the top articles.

In [9]:
rec.format_results()

{'entry': 'Random forest',
 'predictions': [{'node': 'Gradient boosting',
   'similarity_rank': 0.4629312240830749,
   'label_proba': [0.6152191681287082, 0.3847808318712916],
   'position': 'after'},
  {'node': 'Bootstrap aggregating',
   'similarity_rank': 0.36334365644162603,
   'label_proba': [0.6556097534621095, 0.34439024653789024],
   'position': 'after'},
  {'node': 'Random subspace method',
   'similarity_rank': 0.35118901548815384,
   'label_proba': [0.546979258272133, 0.4530207417278669],
   'position': 'after'},
  {'node': 'Boosting (machine learning)',
   'similarity_rank': 0.3421308783981287,
   'label_proba': [0.6518065652357944, 0.34819343476420533],
   'position': 'after'},
  {'node': 'Decision tree learning',
   'similarity_rank': 0.3314717857532199,
   'label_proba': [0.6713333426792982, 0.32866665732070155],
   'position': 'after'},
  {'node': 'Out-of-bag error',
   'similarity_rank': 0.2600665165267665,
   'label_proba': [0.606178854079004, 0.39382114592099593],
  

# Optional: Format as DataFrame for Easy Viewing

In [21]:
pd.DataFrame(rec.format_results(decision_threshold=0.64)['predictions']).position.value_counts()

before    61
after     38
Name: position, dtype: int64