In [1]:
from data_loader import DataLoader, FriendEdgeEncoding, NodeType, playtime_forever_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, print_game_edges_for_user
from matplotlib import pyplot as plt
import networkx as nx

In [2]:
# Testing the different normalization functions
user = 76561198878838541

test_data_loader = DataLoader(score_normalizers = [LinearNormalizer(0.0, 1.0)])
network = test_data_loader.get_full_network()
print('Constant with linear norm applied')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('No norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [LinearNormalizer(0.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Linear norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Gaussian norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [PercentileNormalizer()], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Percentile norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [LinearNormalizer(0.0, 1.0), GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Linear then gaussian')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [PercentileNormalizer(), GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('percentile then gaussian')
print_game_edges_for_user(network, user)
print()

Constant with linear norm applied
Edges for user 76561198878838541:
[(76561198878838541, 227300, {'score': 1.0}),
 (76561198878838541, 236390, {'score': 1.0}),
 (76561198878838541, 244210, {'score': 1.0}),
 (76561198878838541, 301520, {'score': 1.0}),
 (76561198878838541, 380600, {'score': 1.0}),
 (76561198878838541, 730, {'score': 1.0}),
 (76561198878838541, 1172470, {'score': 1.0}),
 (76561198878838541, 1238860, {'score': 1.0}),
 (76561198878838541, 1248130, {'score': 1.0}),
 (76561198878838541, 1938090, {'score': 1.0}),
 (76561198878838541, 47890, {'score': 1.0}),
 (76561198878838541, 90200, {'score': 1.0}),
 (76561198878838541, 1106140, {'score': 1.0}),
 (76561198878838541, 1803190, {'score': 1.0}),
 (76561198878838541, 397900, {'score': 1.0}),
 (76561198878838541, 40990, {'score': 1.0})]

No norm
Edges for user 76561198878838541:
[(76561198878838541, 227300, {'playtime_forever': 12220, 'score': 12220}),
 (76561198878838541, 236390, {'playtime_forever': 57, 'score': 57}),
 (7656119

In [3]:
# Testing splits
test_data_loader = DataLoader()
test_data_loader.load_random_train_test_network()
test_data_loader.load_stratified_user_degree_train_test_network()
test_data_loader.load_stratified_user_train_test_network()

In [4]:
# Testing friendship encoding
test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = (lambda edge_data: edge_data['playtime_forever']), score_normalizers = [], user_embeddings = [], game_embeddings = ['name'], user_game_edge_embeddings = ['playtime_forever'], friend_friend_edge_embeddings = [])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.ALL_FRIENDSHIPS, score_normalizers = [], user_embeddings = [], game_embeddings = [], user_game_edge_embeddings = ['example_sum_user_id_game_id_playtime_forever'], friend_friend_edge_embeddings = [])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

test_data_loader = DataLoader(friendship_edge_encoding = FriendEdgeEncoding.BETWEEN_USERS, edge_scoring_function = playtime_forever_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
network = test_data_loader.get_full_network()
test_data_loader.load_stratified_user_train_test_network(network=network, train_percentage=0.8, test_percentage=0.2, seed=0)
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

# selected_nodes = [node for node in network.nodes() if network.degree(node) > 1]
# subgraph = network.subgraph(selected_nodes)
# nx.draw(subgraph, node_size=5)
# plt.show()

Num edges:  129038
Num nodes:  19413
[(76561198166465514, 620, {'playtime_forever': 5308, 'score': 5308}), (76561198166465514, 70110, {'playtime_forever': 0, 'score': 0}), (76561198166465514, 99900, {'playtime_forever': 45, 'score': 45}), (76561198166465514, 205100, {'playtime_forever': 1305, 'score': 1305}), (76561198166465514, 8930, {'playtime_forever': 2646, 'score': 2646}), (76561198166465514, 70100, {'playtime_forever': 161, 'score': 161}), (76561198166465514, 400, {'playtime_forever': 2028, 'score': 2028}), (76561198166465514, 105600, {'playtime_forever': 406, 'score': 406}), (76561198166465514, 17410, {'playtime_forever': 774, 'score': 774}), (76561198166465514, 204360, {'playtime_forever': 5267, 'score': 5267})]
[(76561199512675465, 730, {'playtime_forever': 12, 'score': 12}), (76561199512675465, 766570, {'playtime_forever': 2, 'score': 2}), (76561199512675465, 714010, {'playtime_forever': 5, 'score': 5}), (76561199512675465, 1172470, {'playtime_forever': 1, 'score': 1}), (7656

In [5]:
# Printing out game node degrees
test_data_loader = DataLoader(game_embeddings = ['name'])
network = test_data_loader.get_full_network()
degrees = [(game, degree) for game, degree in network.degree()]
degrees = [g for g in network.degree() if g[0] < 1999220]
degrees = sorted(degrees, key=lambda g: g[1], reverse=True)
out = [(network.nodes(data=True)[game]['name'], degree) for game, degree in degrees]
for x in out:
    print(x)

('Counter-Strike 2', 4648)
('PUBG: BATTLEGROUNDS', 2895)
('Apex Legends', 2248)
('Left 4 Dead 2', 1548)
('Goose Goose Duck', 1316)
('NARAKA: BLADEPOINT', 1279)
('Grand Theft Auto V', 1088)
('The Forest', 972)
('Aimlabs', 927)
('Call of Duty®', 917)
('Battlefield™ V', 899)
('Destiny 2', 825)
("Don't Starve Together", 796)
('War Thunder', 750)
('Unturned', 707)
('PAYDAY 2', 697)
('Titanfall® 2', 605)
('Warframe', 594)
('Terraria', 588)
('ARK: Survival Of The Fittest', 577)
('ARK: Survival Evolved', 576)
('Portal 2', 571)
('The Sims™ 4', 553)
('Half-Life', 520)
('Human Fall Flat', 515)
('Battlefield 1 ™', 489)
('Stumble Guys', 470)
('Red Dead Redemption 2', 469)
('SCP: Secret Laboratory', 458)
('Dying Light', 438)
('Left 4 Dead', 425)
('Brawlhalla', 423)
('Euro Truck Simulator 2', 412)
('Palworld', 398)
('Crab Game', 389)
('Phasmophobia', 382)
('Lethal Company', 371)
('The Witcher 3: Wild Hunt', 369)
('Portal', 367)
('Dead Island Riptide Definitive Edition', 358)
("Sid Meier's Civilizatio

In [6]:
# Testing for the game embeddings
test_data_loader = DataLoader(game_embeddings = ['name', 'numReviews', 'avgReviewScore', 'price', 'genres', 'tags', 'numFollowers'])
network = test_data_loader.get_full_network()

print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

[(76561198166465514, 620, {'score': 1}), (76561198166465514, 70110, {'score': 1}), (76561198166465514, 99900, {'score': 1}), (76561198166465514, 205100, {'score': 1}), (76561198166465514, 8930, {'score': 1}), (76561198166465514, 70100, {'score': 1}), (76561198166465514, 400, {'score': 1}), (76561198166465514, 105600, {'score': 1}), (76561198166465514, 17410, {'score': 1}), (76561198166465514, 204360, {'score': 1})]
[(76561199512675465, 730, {'score': 1}), (76561199512675465, 766570, {'score': 1}), (76561199512675465, 714010, {'score': 1}), (76561199512675465, 1172470, {'score': 1}), (76561199512675465, 976800, {'score': 1}), (76561199512675465, 1520470, {'score': 1}), (76561199512675465, 1568590, {'score': 1}), (76561199512675465, 1623660, {'score': 1}), (76561199512675465, 1240440, {'score': 1}), (76561199512675465, 578080, {'score': 1})]
[(76561198166465514, {'node_type': <NodeType.USER: 1>}), (76561199498351045, {'node_type': <NodeType.USER: 1>}), (76561199500648896, {'node_type': <

In [7]:
# Print games df sorted by a field column
print(test_data_loader.games_df.sort_values(by='numReviews', ascending=False).head(20)[['name', 'numReviews']])

                                name  numReviews
27                  Counter-Strike 2     7891051
132              PUBG: BATTLEGROUNDS     2303801
111               Grand Theft Auto V     1594484
7                           Terraria     1154313
144   Tom Clancy's Rainbow Six Siege     1148791
140                      Garry's Mod      972307
153                             Rust      939957
100                     Apex Legends      793016
134                    Left 4 Dead 2      756091
234         The Witcher 3: Wild Hunt      712821
71                        ELDEN RING      688194
636                 Dead by Daylight      669961
119                         Among Us      657055
93             ARK: Survival Evolved      642765
1070                  Cyberpunk 2077      630584
52                          PAYDAY 2      624578
157           Euro Truck Simulator 2      624208
66                         Destiny 2      583107
407                   Stardew Valley      582247
136                 