This notebook shows how to use a `GraphCreator` instance in a recommendation pipeline to easily produce the top recommendations and display their predicted order (before/after)

In [1]:
%load_ext autoreload
%autoreload 1

import sys
sys.path.append('../utils/')

import pickle
import numpy as np
import pandas as pd

from GraphAPI import GraphCreator
from RecommenderPipeline import Recommender

from sklearn.preprocessing import normalize, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler


%aimport GraphAPI
%aimport RecommenderPipeline

# Load in Models

In [2]:
with open("../models/rf_classifier_v2.pkl", "rb") as model:
    rf_classifier = pickle.load(model)
    
with open("../models/xg_model_semisupervised_v2.pkl", "rb") as model:
    xg_classifier = pickle.load(model)

# Initialize `GraphCreator` Instance

After initialization, pass as an argument to a new recommender instance

In [3]:
gc = GraphCreator("https://en.wikipedia.org/wiki/Receiver_operating_characteristic")

rec = Recommender(gc)

# Fit the Recommender 

In [4]:
rec.fit(scaler=Normalizer)

# Make Predictions
Pass in your model to make predictions on the data

In [5]:
rec.predict(rf_classifier)
# rec.predict(xg_classifier)

# Format the Results
Will return as a dictionary containing the entry node and the predictions of the top articles.

In [6]:
rec.format_results()

{'entry': 'Receiver operating characteristic',
 'predictions': [{'node': 'Detection error tradeoff',
   'similarity_rank': 0.06624103913601846,
   'label_proba': [0.48295221130911925, 0.5170477886908806],
   'position': 'before'},
  {'node': 'Diagnostic odds ratio',
   'similarity_rank': 0.04565181757444286,
   'label_proba': [0.5357898564711393, 0.4642101435288606],
   'position': 'after'},
  {'node': 'Net reclassification improvement',
   'similarity_rank': 0.0413328706690984,
   'label_proba': [0.6657316763483345, 0.3342683236516654],
   'position': 'after'},
  {'node': 'Constant false alarm rate',
   'similarity_rank': 0.040766376010195414,
   'label_proba': [0.5435104298877957, 0.4564895701122042],
   'position': 'after'},
  {'node': 'Sensitivity index',
   'similarity_rank': 0.04071341295719475,
   'label_proba': [0.5361368750439643, 0.4638631249560356],
   'position': 'after'},
  {'node': 'Brier score',
   'similarity_rank': 0.040391635888027994,
   'label_proba': [0.40035259162

# Optional: Format as DataFrame for Easy Viewing

In [12]:
recommendations = pd.DataFrame(rec.format_results(decision_threshold=0.47)['predictions'])
print(recommendations.position.value_counts())
recommendations

before    52
after     47
Name: position, dtype: int64


Unnamed: 0,node,similarity_rank,label_proba,position
0,Detection error tradeoff,0.066241,"[0.48295221130911925, 0.5170477886908806]",after
1,Diagnostic odds ratio,0.045652,"[0.5357898564711393, 0.4642101435288606]",after
2,Net reclassification improvement,0.041333,"[0.6657316763483345, 0.3342683236516654]",after
3,Constant false alarm rate,0.040766,"[0.5435104298877957, 0.4564895701122042]",after
4,Sensitivity index,0.040713,"[0.5361368750439643, 0.4638631249560356]",after
5,Brier score,0.040392,"[0.4003525916218635, 0.5996474083781366]",before
6,Lift (data mining),0.037997,"[0.5392185207901286, 0.4607814792098715]",after
7,Youden's J statistic,0.037162,"[0.5323907790859558, 0.4676092209140441]",after
8,Positive and negative predictive values,0.024235,"[0.5715732993875478, 0.42842670061245214]",after
9,F1 score,0.022330,"[0.5248004778131767, 0.47519952218682326]",after
