This notebook shows how to use a `GraphCreator` instance in a recommendation pipeline to easily produce the top recommendations and display their predicted order (before/after)

In [1]:
%load_ext autoreload
%autoreload 1

import sys
sys.path.append('../utils/')

import pickle
import numpy as np
import pandas as pd

from GraphAPI import GraphCreator
from RecommenderPipeline import Recommender

from sklearn.preprocessing import normalize, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler


%aimport GraphAPI
%aimport RecommenderPipeline

# Load in Models

In [2]:
with open("../models/rf_classifier_v2_normalized.pkl", "rb") as model:
    rf_v2_classifier = pickle.load(model)
    
with open("../models/rf_classifier_v3_normalized_714.pkl", "rb") as model:
    rf_v3_classifier = pickle.load(model)    
    
with open("../models/rf_classifier_v4_732.pkl", "rb") as model:
    rf_v4_classifier = pickle.load(model)    
    
with open("../models/xg_model_semisupervised_v2.pkl", "rb") as model:
    xg_classifier = pickle.load(model)

# Initialize `GraphCreator` Instance

After initialization, pass as an argument to a new recommender instance

In [3]:
gc = GraphCreator("Random forest", include_see_also=False, max_recursive_requests=50)
print("Layer 1 nodes:", len(gc.next_links))
rec = Recommender(gc)

Layer 1 nodes: 296


# Fit the Recommender 

In [4]:
rec.fit(scaler=Normalizer)

# Make Predictions
Pass in your model to make predictions on the data

In [16]:
rec.predict(rf_v2_classifier)
# rec.predict(xg_classifier)

# Format the Results
Will return as a dictionary containing the entry node and the predictions of the top articles.

In [17]:
rec.format_results()

{'entry': 'Random forest',
 'decision_threshold': 0.5200000000000002,
 'predictions': [{'node': 'Bootstrap aggregating',
   'similarity_rank': 3.102432975680058,
   'label_proba': [0.46528778264212683, 0.5347122173578732],
   'position': 'before'},
  {'node': 'Gradient boosting',
   'similarity_rank': 3.0612131136851835,
   'label_proba': [0.6075777649748073, 0.3924222350251926],
   'position': 'after'},
  {'node': 'Random subspace method',
   'similarity_rank': 2.9983039978947565,
   'label_proba': [0.5819371343136285, 0.41806286568637124],
   'position': 'after'},
  {'node': 'Decision tree learning',
   'similarity_rank': 2.831733317059,
   'label_proba': [0.5224169784528564, 0.47758302154714344],
   'position': 'after'},
  {'node': 'Out-of-bag error',
   'similarity_rank': 2.21944864829507,
   'label_proba': [0.6269776572084439, 0.373022342791556],
   'position': 'after'},
  {'node': 'Boosting (machine learning)',
   'similarity_rank': 2.120161319311074,
   'label_proba': [0.5859499

# Optional: Format as DataFrame for Easy Viewing

In [21]:
formatted_results = rec.format_results(0.582)

recommendations = pd.DataFrame(formatted_results['predictions'])
print(recommendations.position.value_counts())
print("Decision Threshold:", round(formatted_results['decision_threshold'], 2))
recommendations[['node', 'position', "label_proba"]]

before    72
after     27
Name: position, dtype: int64
Decision Threshold: 0.58


Unnamed: 0,node,position,label_proba
0,Bootstrap aggregating,before,"[0.46528778264212683, 0.5347122173578732]"
1,Gradient boosting,after,"[0.6075777649748073, 0.3924222350251926]"
2,Random subspace method,before,"[0.5819371343136285, 0.41806286568637124]"
3,Decision tree learning,before,"[0.5224169784528564, 0.47758302154714344]"
4,Out-of-bag error,after,"[0.6269776572084439, 0.373022342791556]"
5,Boosting (machine learning),after,"[0.585949970626697, 0.41405002937330293]"
6,Ensemble learning,before,"[0.5510609451063047, 0.44893905489369523]"
7,Statistical classification,before,"[0.4601552843359293, 0.5398447156640707]"
8,Logic learning machine,after,"[0.6564970505878072, 0.34350294941219284]"
9,Occam learning,before,"[0.471734064973921, 0.528265935026079]"
