In [58]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from db.graph import Neo4JConnector
from pykeen.triples import TriplesFactory
from pykeen.predict import predict_all
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
neo4j = Neo4JConnector.create_from_config("../config.ini")

In [91]:
result = neo4j.exec("""MATCH 
        (p:Player)-[pc:PLAYS_CHAMP]->(c:Champion), 
        (p)-[hp:HAS_POSITION]->(pos:Position),
        (p)-[pi:PLAYS_IN]->(pim:PlayerInMatch)
    WITH p, c, pos, COUNT(pim.matchID) AS matchesPlayed
    WHERE matchesPlayed > 10
    RETURN p, c, pos""")

In [93]:
df = pd.DataFrame(
    [(r["p"]["accountID"], r["p"]["platformID"], r["pos"]["positionID"], r["c"]["championName"]) for r in  result.records],
    columns=("accountID", "platformID", "positionID", "championName"))
df

Unnamed: 0,accountID,platformID,positionID,championName
0,INBtLo1tFtY4ApN7MKe6-n83CkYccz49oKnRe-PhG031qgA,KR,TOP,Akali
1,INBtLo1tFtY4ApN7MKe6-n83CkYccz49oKnRe-PhG031qgA,KR,TOP,Lucian
2,INBtLo1tFtY4ApN7MKe6-n83CkYccz49oKnRe-PhG031qgA,KR,TOP,Irelia
3,INBtLo1tFtY4ApN7MKe6-n83CkYccz49oKnRe-PhG031qgA,KR,TOP,Aatrox
4,INBtLo1tFtY4ApN7MKe6-n83CkYccz49oKnRe-PhG031qgA,KR,TOP,Sett
...,...,...,...,...
38259,gAvxLk1Y_i5QOhiLfvg8NGk6ULRQ-IkLjzJN7_1-sEhCmI...,KR,JGL,Ekko
38260,gAvxLk1Y_i5QOhiLfvg8NGk6ULRQ-IkLjzJN7_1-sEhCmI...,KR,JGL,Nocturne
38261,gAvxLk1Y_i5QOhiLfvg8NGk6ULRQ-IkLjzJN7_1-sEhCmI...,KR,JGL,Karthus
38262,R_BUPPLlN13kPOeqnQGbfkAWDEJi0mbKTum8ZtE9PsyGlqg,KR,JGL,Olaf


In [127]:
user_subset = pd.Series(df["accountID"].unique()).sample(1000)
df_reduced = df[df.accountID.isin(user_subset)]

In [128]:
df_positions = df_reduced.apply(lambda x: pd.Series([f"{x['accountID']};{x['platformID']}", "PLAYS_POSITION", x['positionID']]), axis=1).drop_duplicates()
df_champs = df_reduced.apply(lambda x: pd.Series([f"{x['accountID']};{x['platformID']}", "PLAYS_CHAMP", x['championName']]), axis=1)

In [129]:
tf_full = TriplesFactory.from_labeled_triples(np.array(pd.concat([df_positions, df_champs])))
training, testing, validation = tf_full.split([.8, .1, .1])

INFO:pykeen.triples.splitting:done splitting triples to groups of sizes [3710, 598, 598]


In [130]:
from pykeen.pipeline import pipeline

result = pipeline(
    training=training,
    testing=testing,
    validation=validation,
    model = 'TransE',
    random_seed=1,
    device='cpu',
)

INFO:pykeen.pipeline.api:Using device: cpu


Training epochs on cpu:   0%|          | 0/5 [00:00<?, ?epoch/s]

Training batches on cpu:   0%|          | 0/19 [00:00<?, ?batch/s]

Training batches on cpu:   0%|          | 0/19 [00:00<?, ?batch/s]

Training batches on cpu:   0%|          | 0/19 [00:00<?, ?batch/s]

Training batches on cpu:   0%|          | 0/19 [00:00<?, ?batch/s]

Training batches on cpu:   0%|          | 0/19 [00:00<?, ?batch/s]

Evaluating on cpu:   0%|          | 0.00/598 [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 0.15s seconds


In [156]:
from pykeen.predict import predict_triples

pack = predict_triples(model=result.model, triples=testing)
predictions = pack.process(factory=result.training).df
predictions



Unnamed: 0,head_id,head_label,relation_id,relation_label,tail_id,tail_label,score
0,237,DFKFAZSYmNxPwZhHjYLhO_vS4RgqC_ZG5k3d1a5jvXxyp1...,1,PLAYS_POSITION,538,SUP,-7.709692
1,641,W4Lr26a5FM4cazmE9_gGALJP1dTaMMAwc86KawUHcDGB-w...,0,PLAYS_CHAMP,458,Nidalee,-11.726681
2,404,LOtCD2lfGrD86ep6J9MWkPEcTTiutgaVlFu1uhXq15CehS...,0,PLAYS_CHAMP,543,Sett,-9.847982
3,638,W074qHHcqi0DzkEMpQFlgUOWaG_8Uy6Lf7mDWIhr5boGyY...,0,PLAYS_CHAMP,632,Vladimir,-7.970482
4,991,pwdrKiPOcCFreVvDrRGM0eocy9_SYxKyRkWo9V_FHTkCFE...,0,PLAYS_CHAMP,529,Rumble,-9.958089
...,...,...,...,...,...,...,...
593,992,pxOqh9J40fZjFSzqkbtuOc1rMuu0VBTqWkP4sg6_CLzPQX...,1,PLAYS_POSITION,425,MID,-7.350600
594,938,mqZrk1NuGSN-26ZKIgTa9eZXxXaY8x3adNnsSiREOVleTO...,0,PLAYS_CHAMP,443,Mordekaiser,-10.082693
595,1122,xyWKyc5SuE4SqOM797elsDo3dhJ9Gqfxu8wZTiQczjIX;KR,0,PLAYS_CHAMP,627,Veigar,-9.181013
596,1040,segBcp2_WFPNXxC5S-aKkYEuZXmpKOfjmLQQg992Nazkgz...,0,PLAYS_CHAMP,709,Zoe,-10.076319


In [171]:
results = pd.DataFrame(testing.triples, columns=("subj", "rel", "obj")).merge(predictions, left_on=["subj", "rel"], right_on=["head_label", "relation_label"])
results = results[["subj", "rel", "obj", "tail_label"]]
results.sort_values(["subj", "rel"])



Unnamed: 0,subj,rel,obj,tail_label
777,-K5Dcqaa_SYxpKQUG1mm9RVO7WlTYDRVCg9bRmDpdQjxEY...,PLAYS_POSITION,SUP,SUP
25,-UxVo_E8wP7ctL4i6u8o6tUhJKQ9YV_XdnORB90EreFN_Y...,PLAYS_POSITION,TOP,TOP
183,-Y0DXJZWoKA3ICLpBIJ_S5b-3kU4JUmiPVdw-xgrWehN;KR,PLAYS_CHAMP,Kai'Sa,Kai'Sa
184,-Y0DXJZWoKA3ICLpBIJ_S5b-3kU4JUmiPVdw-xgrWehN;KR,PLAYS_CHAMP,Kai'Sa,Qiyana
533,-Y0DXJZWoKA3ICLpBIJ_S5b-3kU4JUmiPVdw-xgrWehN;KR,PLAYS_CHAMP,Qiyana,Kai'Sa
...,...,...,...,...
61,zQCIwK89u3pr4xhui-ePig6M-uKpp49jb-LN3vTnVRf_jG...,PLAYS_CHAMP,Rek'Sai,Twisted Fate
365,zQCIwK89u3pr4xhui-ePig6M-uKpp49jb-LN3vTnVRf_jG...,PLAYS_CHAMP,Twisted Fate,Rek'Sai
366,zQCIwK89u3pr4xhui-ePig6M-uKpp49jb-LN3vTnVRf_jG...,PLAYS_CHAMP,Twisted Fate,Twisted Fate
151,zaz8bj3nOMJ6NC30cvdQSnd8E9Lh8VpPSNwa09CBOJHwIb...,PLAYS_CHAMP,Karthus,Karthus
