In [95]:
import torch
import pandas as pd
import numpy as np
from torch.nn import Linear
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer

import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv, to_hetero

from torch_geometric.data import HeteroData
from torch_geometric.transforms import ToUndirected, RandomLinkSplit

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
torch.set_default_dtype(torch.float64)

cpu


In [96]:
from py2neo import Graph
graph = Graph("bolt://localhost:11003", auth=("neo4j", "johnnyZou0201"))

In [97]:
d = graph.run('MATCH (p:Product) RETURN p.description LIMIT 5').data()
for p in d:
#     print(p)
    print(p['p.description'])

WHITE MOROCCAN METAL LANTERN
CREAM CUPID HEARTS COAT HANGER
KNITTED UNION FLAG HOT WATER BOTTLE
RED WOOLLY HOTTIE WHITE HEART.
SET 7 BABUSHKA NESTING BOXES


In [143]:
d = graph.run('MATCH (p:Product) WHERE p.code = "71053" RETURN p.description as name').data()
print(d[0]['name'])
# for p in d:
#     print(p)
#     print(p['p.description'])

WHITE MOROCCAN METAL LANTERN


In [99]:
# drop graph
# graph.run("""
# CALL gds.graph.drop('retail01') YIELD graphName;
# """)
# # graph.run("""
# # CALL gds.graph.drop('retail02') YIELD graphName;
# # """)


In [100]:
# create a subgraph
graph.run("""
CALL gds.graph.project('retail01', ['Client', 'Product'], 
  {PURCHASE: {orientation:'UNDIRECTED'}})
""")

nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
"{Product: {label: 'Product', properties: {}}, Client: {label: 'Client', properties: {}}}","{PURCHASE: {orientation: 'UNDIRECTED', aggregation: 'DEFAULT', type: 'PURCHASE', properties: {}}}",retail01,8056,811144,548


In [101]:
# use fastrp algorithm to write the embeddings into clients

graph.run("""
CALL gds.fastRP.write('retail01', {writeProperty:'fastrp', embeddingDimension:56})
""")

nodeCount,nodePropertiesWritten,preProcessingMillis,computeMillis,writeMillis,configuration
8056,8056,0,359,927,"{writeConcurrency: 4, nodeSelfInfluence: 0, relationshipWeightProperty: null, propertyRatio: 0.0, concurrency: 4, normalizationStrength: 0.0, writeProperty: 'fastrp', iterationWeights: [0.0, 1.0, 1.0], embeddingDimension: 56, nodeLabels: ['*'], sudo: false, relationshipTypes: ['*'], featureProperties: [], username: null}"


In [102]:
from neo4j import GraphDatabase

url= 'bolt://localhost:11003'
user = 'neo4j'
password = 'johnnyZou0201'

driver = GraphDatabase.driver(url, auth=(user, password))

def fetch_data(query, params={}):
  with driver.session() as session:
    result = session.run(query, params)
    return pd.DataFrame([r.values() for r in result], columns=result.keys())

In [103]:
from IPython.display import display

def load_node(cypher, index_col, encoders=None, **kwargs):
    # Execute the cypher query and retrieve data from Neo4j
    df = fetch_data(cypher)
    df.set_index(index_col, inplace=True)
    display(df.head(5))
    # Define node mapping
    mapping = {index: i for i, index in enumerate(df.index.unique())}
    # Define node features
    x = None
    if encoders is not None:
        xs = [encoder(df[col]) for col, encoder in encoders.items()]
        x = torch.cat(xs, dim=-1)

    return x, mapping

In [104]:
def load_edge(cypher, src_index_col, src_mapping, dst_index_col, dst_mapping,
                  encoders=None, **kwargs):
    # Execute the cypher query and retrieve data from Neo4j
    df = fetch_data(cypher)
    display(df.head(5))
    # Define edge index
    src = [src_mapping[index] for index in df[src_index_col]]
    dst = [dst_mapping[index] for index in df[dst_index_col]]
    edge_index = torch.tensor([src, dst])
    # Define edge features
    edge_attr = None
    if encoders is not None:
        edge_attrs = [encoder(df[col]) for col, encoder in encoders.items()]
        edge_attr = torch.cat(edge_attrs, dim=-1)

    return edge_index, edge_attr

In [105]:
class SequenceEncoder(object):
    # The 'SequenceEncoder' encodes raw column strings into embeddings.
    def __init__(self, model_name='all-MiniLM-L6-v2', device=None):
        self.device = device
        self.model = SentenceTransformer(model_name, device=device)

    @torch.no_grad()
    def __call__(self, df):
        x = self.model.encode(df.values, show_progress_bar=True,
                              convert_to_tensor=True, device=self.device)
        return x.cpu()

In [106]:
class PriceEncoder(object):
    # The 'PriceEncoder' splits the raw column strings by 'sep' and converts
    # individual elements to categorical labels.
    def __init__(self, dtype=None):
        self.dtype = dtype

    def __call__(self, df):
        return torch.tensor(df.values).to(torch.double).unsqueeze(1)

In [107]:
class IdentityEncoder(object):
    # The 'IdentityEncoder' takes the raw column values and converts them to
    # PyTorch tensors.
    def __init__(self, dtype=None, is_list=False):
        self.dtype = dtype
        self.is_list = is_list

    def __call__(self, df):
        if self.is_list:
            return torch.stack([torch.tensor(el) for el in df.values])
        return torch.from_numpy(df.values).to(self.dtype)

In [108]:
client_query = """
MATCH (c:Client) RETURN c.id AS clientId
"""

client_x, client_mapping = load_node(client_query, index_col='clientId')

15101
17033
13221
14201
17692


In [109]:
product_query = """
MATCH (p:Product)
WITH p
RETURN p.code AS productCode, p.unit_price AS price, p.description AS info, p.fastrp AS fastrp
"""

product_x, product_mapping = load_node(
    product_query, 
    index_col='productCode', encoders={
        'price': PriceEncoder(),
        'info': SequenceEncoder(),
        'fastrp': IdentityEncoder(is_list=True)
    })

Unnamed: 0_level_0,price,info,fastrp
productCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
71053,3.75,WHITE MOROCCAN METAL LANTERN,"[0.42139118909835815, 0.30570438504219055, 0.5..."
84406B,4.15,CREAM CUPID HEARTS COAT HANGER,"[0.2723379135131836, 0.20069877803325653, 0.44..."
84029G,4.25,KNITTED UNION FLAG HOT WATER BOTTLE,"[0.42001596093177795, 0.2053660899400711, 0.46..."
84029E,4.25,RED WOOLLY HOTTIE WHITE HEART.,"[0.31302839517593384, 0.20005005598068237, 0.4..."
22752,3.95,SET 7 BABUSHKA NESTING BOXES,"[0.3180159330368042, 0.23087292909622192, 0.60..."


Batches:   0%|          | 0/116 [00:00<?, ?it/s]

In [110]:
buy_query = """
MATCH (c:Client)-[r:PURCHASE]->(p:Product) 
RETURN c.id AS clientId, p.code AS productCode, r.rate AS rate
"""

edge_index, edge_label = load_edge(
    buy_query,
    src_index_col='clientId',
    src_mapping=client_mapping,
    dst_index_col='productCode',
    dst_mapping=product_mapping,
    encoders={'rate': IdentityEncoder(dtype=torch.int32)},
)

Unnamed: 0,clientId,productCode,rate
0,16713,71053,5
1,15471,71053,4
2,15471,71053,0
3,18219,71053,3
4,14096,71053,5


In [111]:
data = HeteroData()
# Add user node features for message passing:
data['client'].x = torch.eye(len(client_mapping), device=device)
# Add movie node features
data['product'].x = product_x
# Add ratings between users and movies
data['client', 'purchase', 'product'].edge_index = edge_index
data['client', 'purchase', 'product'].edge_label = edge_label
data.to(device, non_blocking=True)

HeteroData(
  [1mclient[0m={ x=[4372, 4372] },
  [1mproduct[0m={ x=[3684, 441] },
  [1m(client, purchase, product)[0m={
    edge_index=[2, 405572],
    edge_label=[405572]
  }
)

In [112]:
data = ToUndirected()(data)
del data['product', 'rev_purchase', 'client'].edge_label  # Remove "reverse" label.

# 2. Perform a link-level split into training, validation, and test edges.
transform = RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[('client', 'purchase', 'product')],
    rev_edge_types=[('product', 'rev_purchase', 'client')],
)
train_data, val_data, test_data = transform(data)

In [113]:
class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['client'][row], z_dict['product'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)

class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

In [114]:
weight = torch.bincount(train_data['client', 'product'].edge_label)
weight = weight.max() / weight

def weighted_mse_loss(pred, target, weight=None):
    weight = 1. if weight is None else weight[target].to(pred.dtype)
    return (weight * (pred - target.to(pred.dtype)).pow(2)).mean()

In [115]:
model = Model(hidden_channels=18).to(device)
model.double()

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (client__purchase__product): SAGEConv((-1, -1), 18, aggr=mean)
      (product__rev_purchase__client): SAGEConv((-1, -1), 18, aggr=mean)
    )
    (conv2): ModuleDict(
      (client__purchase__product): SAGEConv((-1, -1), 18, aggr=mean)
      (product__rev_purchase__client): SAGEConv((-1, -1), 18, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(in_features=36, out_features=18, bias=True)
    (lin2): Linear(in_features=18, out_features=1, bias=True)
  )
)

In [116]:
# Due to lazy initialization, we need to run one model step so the number
# of parameters can be inferred:
with torch.no_grad():
    model.encoder(train_data.x_dict, train_data.edge_index_dict)
    
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [117]:
def train():
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['client', 'purchase', 'product'].edge_label_index)
    target = train_data['client', 'purchase', 'product'].edge_label
    loss = weighted_mse_loss(pred, target, weight)
    loss.backward()
    optimizer.step()
    return float(loss)

In [118]:
@torch.no_grad()
def test(data):
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['client', 'purchase', 'product'].edge_label_index)
    pred = pred.clamp(min=0, max=5)
    target = data['client', 'purchase', 'product'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [123]:
for epoch in range(1, 50):
    loss = train()
    train_rmse = test(train_data)
    val_rmse = test(val_data)
    test_rmse = test(test_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
          f'Val: {val_rmse:.4f}, Test: {test_rmse:.4f}')

Epoch: 001, Loss: 4.3265, Train: 1.7162, Val: 1.7286, Test: 1.7313
Epoch: 002, Loss: 4.3257, Train: 1.7179, Val: 1.7304, Test: 1.7332
Epoch: 003, Loss: 4.3251, Train: 1.7196, Val: 1.7323, Test: 1.7351
Epoch: 004, Loss: 4.3245, Train: 1.7206, Val: 1.7335, Test: 1.7364
Epoch: 005, Loss: 4.3239, Train: 1.7206, Val: 1.7337, Test: 1.7367
Epoch: 006, Loss: 4.3233, Train: 1.7196, Val: 1.7330, Test: 1.7359
Epoch: 007, Loss: 4.3227, Train: 1.7180, Val: 1.7317, Test: 1.7347
Epoch: 008, Loss: 4.3222, Train: 1.7166, Val: 1.7306, Test: 1.7335
Epoch: 009, Loss: 4.3217, Train: 1.7158, Val: 1.7301, Test: 1.7330
Epoch: 010, Loss: 4.3212, Train: 1.7159, Val: 1.7304, Test: 1.7334
Epoch: 011, Loss: 4.3207, Train: 1.7168, Val: 1.7315, Test: 1.7345
Epoch: 012, Loss: 4.3202, Train: 1.7179, Val: 1.7328, Test: 1.7359
Epoch: 013, Loss: 4.3198, Train: 1.7188, Val: 1.7339, Test: 1.7370
Epoch: 014, Loss: 4.3194, Train: 1.7191, Val: 1.7345, Test: 1.7376
Epoch: 015, Loss: 4.3190, Train: 1.7188, Val: 1.7343, Test: 1.

In [120]:
num_client = len(client_mapping)
num_product = len(product_mapping)

reverse_client_mapping = dict(zip(client_mapping.values(),client_mapping.keys()))
reverse_product_mapping = dict(zip(product_mapping.values(),product_mapping.keys()))

results = []

for client_id in range(0,num_client): 

    row = torch.tensor([client_id] * num_product)
    col = torch.arange(num_product)
    edge_label_index = torch.stack([row, col], dim=0)

    pred = model(data.x_dict, data.edge_index_dict,
                 edge_label_index)
    pred = pred.clamp(min=0, max=5)
    print(pred)
    user_neo4j_id = reverse_client_mapping[client_id]

    mask = (pred == 5).nonzero(as_tuple=True)

    ten_predictions = [reverse_product_mapping[el] for el in  mask[0].tolist()[:10]]
    results.append({'client': user_neo4j_id, 'product': ten_predictions})
    

KeyboardInterrupt: 

In [89]:
results

[{'client': 17850,
  'product': ['85123A',
   '71053',
   '84406B',
   '84029G',
   '84029E',
   '22752',
   '21730',
   '22633',
   '22632',
   '84879']},
 {'client': 13047,
  'product': ['85123A',
   '71053',
   '84406B',
   '84029G',
   '84029E',
   '22752',
   '21730',
   '22633',
   '22632',
   '84879']},
 {'client': 12583,
  'product': ['85123A',
   '71053',
   '84406B',
   '84029G',
   '84029E',
   '22752',
   '21730',
   '22633',
   '22632',
   '84879']},
 {'client': 13748,
  'product': ['85123A',
   '71053',
   '84406B',
   '84029G',
   '84029E',
   '22752',
   '21730',
   '22633',
   '22632',
   '84879']},
 {'client': 15100,
  'product': ['85123A',
   '71053',
   '84406B',
   '84029G',
   '84029E',
   '22752',
   '21730',
   '22633',
   '22632',
   '84879']},
 {'client': 15291,
  'product': ['85123A',
   '71053',
   '84406B',
   '84029G',
   '84029E',
   '22752',
   '21730',
   '22633',
   '22632',
   '84879']},
 {'client': 14688,
  'product': ['85123A',
   '71053',
   '84406

In [149]:
num_client = len(client_mapping)
num_product = len(product_mapping)

reverse_client_mapping = dict(zip(client_mapping.values(),client_mapping.keys()))
reverse_product_mapping = dict(zip(product_mapping.values(),product_mapping.keys()))

results = []
for client_id in range(0, 300): 
    row = torch.tensor([client_id] * num_product)
    col = torch.arange(num_product)
    edge_label_index = torch.stack([row, col], dim=0)
    # make prediction
    pred = model(data.x_dict, data.edge_index_dict,edge_label_index)
    pred = pred.clamp(min=0, max=5)
    user_neo4j_id = reverse_client_mapping[client_id]
    # only collect the rate >= 4.0
    mask = (pred >= 4.0).nonzero(as_tuple=True)
    # sort mask in descending order based on pred
    mask = mask[0][torch.argsort(pred[mask], descending=True)]  
    # collect the first ten products
    ten_predictions = [reverse_product_mapping[el] for el in  mask.tolist()[:10]]
    results.append({'client': user_neo4j_id, 'product': ten_predictions})
    

In [150]:
results

[{'client': 15101, 'product': []},
 {'client': 17033, 'product': []},
 {'client': 13221, 'product': []},
 {'client': 14201, 'product': []},
 {'client': 17692, 'product': []},
 {'client': 15822, 'product': []},
 {'client': 16134, 'product': []},
 {'client': 17551, 'product': []},
 {'client': 18050, 'product': []},
 {'client': 14913, 'product': []},
 {'client': 12666, 'product': []},
 {'client': 15641, 'product': []},
 {'client': 13124, 'product': []},
 {'client': 17214, 'product': []},
 {'client': 15456, 'product': []},
 {'client': 15062, 'product': []},
 {'client': 12928, 'product': []},
 {'client': 15727, 'product': []},
 {'client': 17980, 'product': []},
 {'client': 15290, 'product': []},
 {'client': 17752,
  'product': ['21645', '35610C', '35610B', '90012A', '23664']},
 {'client': 15615, 'product': []},
 {'client': 12577, 'product': []},
 {'client': 17044, 'product': []},
 {'client': 13829, 'product': []},
 {'client': 17633, 'product': []},
 {'client': 13266, 'product': []},
 {'clie

In [125]:
results1 

[{'client': 15101, 'product': []},
 {'client': 17033, 'product': []},
 {'client': 13221, 'product': []},
 {'client': 14201, 'product': []},
 {'client': 17692, 'product': []},
 {'client': 15822, 'product': []},
 {'client': 16134, 'product': []},
 {'client': 17551, 'product': []},
 {'client': 18050, 'product': []},
 {'client': 14913, 'product': []},
 {'client': 12666, 'product': []},
 {'client': 15641, 'product': []},
 {'client': 13124, 'product': []},
 {'client': 17214, 'product': []},
 {'client': 15456, 'product': []},
 {'client': 15062, 'product': []},
 {'client': 12928, 'product': []},
 {'client': 15727, 'product': []},
 {'client': 17980, 'product': []},
 {'client': 15290, 'product': []},
 {'client': 17752,
  'product': ['35610B', '35610C', '21645', '90012A', '23664']},
 {'client': 15615, 'product': []},
 {'client': 12577, 'product': []},
 {'client': 17044, 'product': []},
 {'client': 13829, 'product': []},
 {'client': 17633, 'product': []},
 {'client': 13266, 'product': []},
 {'clie

In [145]:
def run_query(query):
    d = graph.run(query).data()
    return d[0]['name']

for res in results1:
    if res['product']:
        products = []
        for p in res['product']:
            p = '"' + p + '"'
            query = "MATCH (p:Product) WHERE p.code = "+ p +" RETURN p.description as name"
            products.append(run_query(query))
        
        print('client:', res['client'], 'product:', products)
        print()

client: 17752 product: ['BLACK CHRISTMAS FLOCK DROPLET ', 'WHITE CHRISTMAS FLOCK DROPLET ', 'ASSORTED TUTTI FRUTTI ROUND BOX', 'MIDNIGHT BLUE DROP CRYSTAL NECKLACE', 'FLOWER SHOP DESIGN MUG']

client: 14777 product: ['WEEKEND BAG VINTAGE ROSE PAISLEY', 'PINK BAROQUE FLOCK CANDLE HOLDER', 'BLACK CHRISTMAS FLOCK DROPLET ', 'WHITE CHRISTMAS FLOCK DROPLET ', 'ASSORTED TUTTI FRUTTI ROUND BOX', 'TEA TIME TEA TOWELS ', 'M/COLOUR POM-POM CURTAIN', 'GLASS BEAD HOOP NECKLACE GREEN', 'SMALL TAHITI BEACH BAG', 'LILAC VOTIVE CANDLE']

client: 14373 product: ['BLACK CHRISTMAS FLOCK DROPLET ', 'WHITE CHRISTMAS FLOCK DROPLET ', 'ASSORTED TUTTI FRUTTI ROUND BOX', 'TEA TIME TEA TOWELS ', 'LILAC VOTIVE CANDLE', 'MIDNIGHT BLUE DROP CRYSTAL NECKLACE', 'FLOWER SHOP DESIGN MUG']

client: 14220 product: ['ASSORTED TUTTI FRUTTI ROUND BOX']



In [151]:
def run_query(query):
    d = graph.run(query).data()
    return d[0]['name']

for res in results:
    if res['product']:
        products = []
        for p in res['product']:
            p = '"' + p + '"'
            query = "MATCH (p:Product) WHERE p.code = "+ p +" RETURN p.description as name"
            products.append(run_query(query))
        
        print('client:', res['client'], 'product:', products)
        print()

client: 17752 product: ['ASSORTED TUTTI FRUTTI ROUND BOX', 'WHITE CHRISTMAS FLOCK DROPLET ', 'BLACK CHRISTMAS FLOCK DROPLET ', 'MIDNIGHT BLUE DROP CRYSTAL NECKLACE', 'FLOWER SHOP DESIGN MUG']

client: 14777 product: ['ASSORTED TUTTI FRUTTI ROUND BOX', 'WHITE CHRISTMAS FLOCK DROPLET ', 'BLACK CHRISTMAS FLOCK DROPLET ', 'MIDNIGHT BLUE DROP CRYSTAL NECKLACE', 'FLOWER SHOP DESIGN MUG', 'LILAC VOTIVE CANDLE', 'TEA TIME TEA TOWELS ', 'SMALL TAHITI BEACH BAG', 'WEEKEND BAG VINTAGE ROSE PAISLEY', 'PINK BAROQUE FLOCK CANDLE HOLDER']

client: 14373 product: ['ASSORTED TUTTI FRUTTI ROUND BOX', 'WHITE CHRISTMAS FLOCK DROPLET ', 'BLACK CHRISTMAS FLOCK DROPLET ', 'MIDNIGHT BLUE DROP CRYSTAL NECKLACE', 'FLOWER SHOP DESIGN MUG', 'LILAC VOTIVE CANDLE', 'TEA TIME TEA TOWELS ']

client: 14220 product: ['ASSORTED TUTTI FRUTTI ROUND BOX']

client: 13922 product: ['ASSORTED TUTTI FRUTTI ROUND BOX', 'WHITE CHRISTMAS FLOCK DROPLET ', 'BLACK CHRISTMAS FLOCK DROPLET ', 'MIDNIGHT BLUE DROP CRYSTAL NECKLACE', 'FL

In [66]:
num_client = len(client_mapping)
num_product = len(product_mapping)

reverse_client_mapping = dict(zip(client_mapping.values(),client_mapping.keys()))
reverse_product_mapping = dict(zip(product_mapping.values(),product_mapping.keys()))

results_dict = []

for client_id in range(0, 300): 

    row = torch.tensor([client_id] * num_product)
    col = torch.arange(num_product)
    edge_label_index = torch.stack([row, col], dim=0)

    pred = model(data.x_dict, data.edge_index_dict,
                 edge_label_index)
    pred = pred.clamp(min=0, max=5)

    user_neo4j_id = reverse_client_mapping[client_id]
    
    # dont care about the score just predict
    mask = (pred >= 4).nonzero(as_tuple=True)
    # sort mask in descending order based on pred
    mask = mask[0][torch.argsort(pred[mask], descending=True)]  

    ten_predictions = [reverse_product_mapping[el] for el in  mask.tolist()[:10]]
    results_dict.append({'client': user_neo4j_id, 'product': ten_predictions})
    

In [67]:
results_dict

[{'client': 14673, 'product': []},
 {'client': 13488, 'product': []},
 {'client': 16221, 'product': []},
 {'client': 15002, 'product': []},
 {'client': 15965, 'product': []},
 {'client': 15024, 'product': []},
 {'client': 17675, 'product': []},
 {'client': 13769, 'product': []},
 {'client': 16904, 'product': []},
 {'client': 16086, 'product': []},
 {'client': 13786, 'product': ['90012A']},
 {'client': 13880, 'product': []},
 {'client': 13173, 'product': []},
 {'client': 15858, 'product': []},
 {'client': 15180, 'product': []},
 {'client': 12347, 'product': []},
 {'client': 14739, 'product': []},
 {'client': 16293, 'product': []},
 {'client': 17419, 'product': []},
 {'client': 16775, 'product': []},
 {'client': 16306, 'product': []},
 {'client': 16950, 'product': []},
 {'client': 17591, 'product': []},
 {'client': 12839, 'product': []},
 {'client': 13807, 'product': ['90012A']},
 {'client': 16125, 'product': []},
 {'client': 15081, 'product': []},
 {'client': 18102, 'product': []},
 {'c

In [104]:
query = '''
MATCH (c:Client)-[r]->(p:Product)
WHERE c.id = 17850
RETURN p.description;
'''

from py2neo import Graph
graph = Graph("bolt://localhost:11003", auth=("neo4j", "johnnyZou0201"))
d = graph.run(query).data()
print(d)
# for p in d:
#     print(p)
#     print(p['p.description'])

[{'p.description': 'IVORY EMBROIDERED QUILT '}, {'p.description': 'WOODEN FRAME ANTIQUE WHITE '}, {'p.description': 'SAVE THE PLANET MUG'}, {'p.description': "YOU'RE CONFUSING ME METAL SIGN "}, {'p.description': 'HAND WARMER RED RETROSPOT'}, {'p.description': 'WOOD 2 DRAWER CABINET WHITE FINISH'}, {'p.description': 'COLOURED GLASS STAR T-LIGHT HOLDER'}, {'p.description': 'WOODEN FRAME ANTIQUE WHITE '}, {'p.description': 'WOOD 2 DRAWER CABINET WHITE FINISH'}, {'p.description': 'GLASS STAR FROSTED T-LIGHT HOLDER'}, {'p.description': 'GIN AND TONIC MUG'}, {'p.description': 'VINTAGE BILLBOARD DRINK ME MUG'}, {'p.description': 'RETRO COFFEE MUGS ASSORTED'}, {'p.description': 'EDWARDIAN PARASOL BLACK'}, {'p.description': 'CREAM HANGING HEART T-LIGHT HOLDER'}, {'p.description': 'IVORY EMBROIDERED QUILT '}, {'p.description': 'WOODEN PICTURE FRAME WHITE FINISH'}, {'p.description': 'HAND WARMER RED RETROSPOT'}, {'p.description': 'HAND WARMER RED RETROSPOT'}, {'p.description': 'RETRO COFFEE MUGS 

In [116]:
cypher = '''
MATCH (c:Client)-[r]->(p:Product)
WHERE c.id = 17850
RETURN p.code;
'''
# params = {"client_id": "17850"}

df = fetch_data(cypher)

In [117]:
df

Unnamed: 0,p.code
0,22803
1,82494L
2,21871
3,21169
4,22632
...,...
303,37370
304,82483
305,20679
306,84029G


In [128]:
codes = df['p.code'].values
res = results1[0]['product']

count = 0
for r in res:
    if r in codes:
        count += 1
print(count)

9


In [124]:
res = results1[0]['product']

['85123A',
 '71053',
 '84406B',
 '84029G',
 '84029E',
 '22752',
 '21730',
 '22633',
 '22632',
 '84879']

In [None]:
num_client = len(client_mapping)
num_product = len(product_mapping)

reverse_client_mapping = dict(zip(client_mapping.values(),client_mapping.keys()))
reverse_product_mapping = dict(zip(product_mapping.values(),product_mapping.keys()))

results1 = []

for client_id in range(0, 200): 

    row = torch.tensor([client_id] * num_product)
    col = torch.arange(num_product)
    edge_label_index = torch.stack([row, col], dim=0)

    pred = model(data.x_dict, data.edge_index_dict,
                 edge_label_index)
    pred = pred.clamp(min=0, max=5)
#     print('pred:',pred)

    user_neo4j_id = reverse_client_mapping[client_id]

    mask = (pred >= 4.0).nonzero(as_tuple=True)
#     print('mask:',mask)

    ten_predictions = [reverse_product_mapping[el] for el in  mask[0].tolist()[:10]]
    results1.append({'client': user_neo4j_id, 'product': ten_predictions})
    