This notebook shows how to use a `GraphCreator` instance in a recommendation pipeline to easily produce the top recommendations and display their predicted order (before/after)

In [24]:
%load_ext autoreload
%autoreload 1

import sys
sys.path.append('../utils/')

import pickle
import numpy as np
import pandas as pd

from GraphAPI import GraphCreator
from RecommenderPipeline import Recommender

from sklearn.preprocessing import normalize, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler


%aimport GraphAPI
%aimport RecommenderPipeline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load in Models

In [25]:
with open("../models/rf_classifier_v2_normalized.pkl", "rb") as model:
    rf_classifier = pickle.load(model)
    
with open("../models/xg_model_semisupervised_v2.pkl", "rb") as model:
    xg_classifier = pickle.load(model)

# Initialize `GraphCreator` Instance

After initialization, pass as an argument to a new recommender instance

In [26]:
gc = GraphCreator("Prevention science")
print("Layer 1 nodes:", len(gc.graph.nodes))
rec = Recommender(gc)

Layer 1 nodes: 35


# Fit the Recommender 

In [27]:
rec.fit(scaler=Normalizer)

# Make Predictions
Pass in your model to make predictions on the data

In [28]:
rec.predict(rf_classifier)
# rec.predict(xg_classifier)

# Format the Results
Will return as a dictionary containing the entry node and the predictions of the top articles.

In [35]:
rec.format_results()

{'entry': 'Prevention science',
 'decision_threshold': 0.5500000000000003,
 'predictions': [{'node': 'Protective factor',
   'similarity_rank': 0.01737971568739465,
   'label_proba': [0.40178310037624226, 0.5982168996237576],
   'position': 'before'},
  {'node': 'Medical sociology',
   'similarity_rank': 0.012566392612513588,
   'label_proba': [0.5191529984154043, 0.48084700158459553],
   'position': 'before'},
  {'node': 'Stroke',
   'similarity_rank': 0.010844177475349555,
   'label_proba': [0.5473228960438313, 0.4526771039561686],
   'position': 'before'},
  {'node': 'Emergency psychiatry',
   'similarity_rank': 0.010580470435365714,
   'label_proba': [0.5731768299574662, 0.426823170042534],
   'position': 'after'},
  {'node': 'Psychology',
   'similarity_rank': 0.010254068319517255,
   'label_proba': [0.4424448296578143, 0.5575551703421858],
   'position': 'before'},
  {'node': 'Social support',
   'similarity_rank': 0.010193784411041597,
   'label_proba': [0.5699847246943083, 0.43

# Optional: Format as DataFrame for Easy Viewing

In [36]:
recommendations = pd.DataFrame(rec.format_results()['predictions'])
print(recommendations.position.value_counts())
recommendations

before    50
after     49
Name: position, dtype: int64


Unnamed: 0,node,similarity_rank,label_proba,position
0,Protective factor,0.017380,"[0.40178310037624226, 0.5982168996237576]",before
1,Medical sociology,0.012566,"[0.5191529984154043, 0.48084700158459553]",before
2,Stroke,0.010844,"[0.5473228960438313, 0.4526771039561686]",before
3,Emergency psychiatry,0.010580,"[0.5731768299574662, 0.426823170042534]",after
4,Psychology,0.010254,"[0.4424448296578143, 0.5575551703421858]",before
5,Social support,0.010194,"[0.5699847246943083, 0.43001527530569184]",after
6,Community psychology,0.010031,"[0.555052178793688, 0.44494782120631216]",after
7,Psychologist,0.010031,"[0.555052178793688, 0.44494782120631216]",after
8,London,0.009811,"[0.5560080555822623, 0.44399194441773765]",after
9,Drug court,0.009779,"[0.555052178793688, 0.44494782120631216]",after
