This notebook shows how to use a `GraphCreator` instance in a recommendation pipeline to easily produce the top recommendations and display their predicted order (before/after)

In [24]:
%load_ext autoreload
%autoreload 1

import sys
sys.path.append('../utils/')

import pickle
import numpy as np
import pandas as pd

from GraphAPI import GraphCreator
from RecommenderPipeline import Recommender

from sklearn.preprocessing import normalize, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler


%aimport GraphAPI
%aimport RecommenderPipeline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load in Models

In [25]:
with open("../models/rf_classifier_v2_normalized.pkl", "rb") as model:
    rf_classifier = pickle.load(model)
    
with open("../models/xg_model_semisupervised_v2.pkl", "rb") as model:
    xg_classifier = pickle.load(model)

# Initialize `GraphCreator` Instance

After initialization, pass as an argument to a new recommender instance

In [58]:
gc = GraphCreator("Music theory")
print("Layer 1 nodes:", len(gc.graph.nodes))
rec = Recommender(gc)

Layer 1 nodes: 2185


# Fit the Recommender 

In [59]:
rec.fit(scaler=Normalizer)

# Make Predictions
Pass in your model to make predictions on the data

In [60]:
rec.predict(rf_classifier)
# rec.predict(xg_classifier)

# Format the Results
Will return as a dictionary containing the entry node and the predictions of the top articles.

In [61]:
rec.format_results()

{'entry': 'Music theory',
 'decision_threshold': 0.5600000000000003,
 'predictions': [{'node': 'Dickinson classification',
   'similarity_rank': 0.037197024018502826,
   'label_proba': [0.5561503452415255, 0.44384965475847443],
   'position': 'before'},
  {'node': 'Ian Bent',
   'similarity_rank': 0.027839411620091757,
   'label_proba': [0.6268176412977112, 0.3731823587022889],
   'position': 'after'},
  {'node': 'Transformational theory',
   'similarity_rank': 0.022601519434496706,
   'label_proba': [0.5508630971012216, 0.4491369028987782],
   'position': 'before'},
  {'node': 'Tune-family',
   'similarity_rank': 0.02226056248263427,
   'label_proba': [0.6146794670171444, 0.38532053298285573],
   'position': 'after'},
  {'node': 'Society for Music Theory',
   'similarity_rank': 0.018983058671615444,
   'label_proba': [0.5240626858470828, 0.475937314152917],
   'position': 'before'},
  {'node': 'Musicology in Cuba',
   'similarity_rank': 0.018579619750610826,
   'label_proba': [0.70405

# Optional: Format as DataFrame for Easy Viewing

In [63]:
recommendations = pd.DataFrame(rec.format_results()['predictions'])
print(recommendations.position.value_counts())
recommendations[['node', 'position']]

before    51
after     48
Name: position, dtype: int64


Unnamed: 0,node,position
0,Dickinson classification,before
1,Ian Bent,after
2,Transformational theory,before
3,Tune-family,after
4,Society for Music Theory,before
5,Musicology in Cuba,after
6,Scolica enchiriadis,after
7,Spiral array model,after
8,Cenobio Paniagua,after
9,Giovanni Paolo Foscarini,after
