In [1]:
from data_loader import DataLoader, FriendEdgeEncoding, NodeType, playtime_forever_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, print_game_edges_for_user, never_remove_edge, remove_zero_playtime_edge
from matplotlib import pyplot as plt
import networkx as nx

In [2]:
# Testing the different normalization functions
user = 76561198166465514

test_data_loader = DataLoader(score_normalizers = [LinearNormalizer(0.0, 1.0)])
network = test_data_loader.get_full_network()
print('Constant with linear norm applied')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('No norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [LinearNormalizer(0.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Linear norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Gaussian norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [PercentileNormalizer()], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Percentile norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [LinearNormalizer(0.0, 1.0), GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Linear then gaussian')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [PercentileNormalizer(), GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('percentile then gaussian')
print_game_edges_for_user(network, user)
print()

Constant with linear norm applied
Edges for user 76561198166465514:
[(76561198166465514, 17410, {'score': 1.0}),
 (76561198166465514, 620, {'score': 1.0}),
 (76561198166465514, 204360, {'score': 1.0}),
 (76561198166465514, 99900, {'score': 1.0}),
 (76561198166465514, 105600, {'score': 1.0}),
 (76561198166465514, 205100, {'score': 1.0}),
 (76561198166465514, 70110, {'score': 1.0}),
 (76561198166465514, 8930, {'score': 1.0}),
 (76561198166465514, 70100, {'score': 1.0}),
 (76561198166465514, 400, {'score': 1.0}),
 (76561198166465514, 214420, {'score': 1.0}),
 (76561198166465514, 236390, {'score': 1.0}),
 (76561198166465514, 230410, {'score': 1.0}),
 (76561198166465514, 239030, {'score': 1.0}),
 (76561198166465514, 203160, {'score': 1.0}),
 (76561198166465514, 248530, {'score': 1.0}),
 (76561198166465514, 238460, {'score': 1.0}),
 (76561198166465514, 283080, {'score': 1.0}),
 (76561198166465514, 268910, {'score': 1.0}),
 (76561198166465514, 246620, {'score': 1.0}),
 (76561198166465514, 301

In [3]:
# Testing splits
test_data_loader = DataLoader()
test_data_loader.load_random_train_test_network()
test_data_loader.load_stratified_user_degree_train_test_network()
test_data_loader.load_stratified_user_train_test_network()

In [4]:
# Testing friendship encoding
test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = (lambda edge_data: edge_data['playtime_forever']), score_normalizers = [], user_embeddings = [], game_embeddings = ['name'], user_game_edge_embeddings = ['playtime_forever'], friend_friend_edge_embeddings = [])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.ALL_FRIENDSHIPS, score_normalizers = [], user_embeddings = [], game_embeddings = [], user_game_edge_embeddings = ['example_sum_user_id_game_id_playtime_forever'], friend_friend_edge_embeddings = [])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

test_data_loader = DataLoader(friendship_edge_encoding = FriendEdgeEncoding.BETWEEN_USERS, edge_scoring_function = playtime_forever_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
network = test_data_loader.get_full_network()
test_data_loader.load_stratified_user_train_test_network(network=network, train_percentage=0.8, test_percentage=0.2, seed=0)
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

# selected_nodes = [node for node in network.nodes() if network.degree(node) > 1]
# subgraph = network.subgraph(selected_nodes)
# nx.draw(subgraph, node_size=5)
# plt.show()

Num edges:  1061684
Num nodes:  32836
[(76561198166465514, 17410, {'playtime_forever': 774, 'score': 774}), (76561198166465514, 620, {'playtime_forever': 5308, 'score': 5308}), (76561198166465514, 204360, {'playtime_forever': 5267, 'score': 5267}), (76561198166465514, 99900, {'playtime_forever': 45, 'score': 45}), (76561198166465514, 105600, {'playtime_forever': 406, 'score': 406}), (76561198166465514, 205100, {'playtime_forever': 1305, 'score': 1305}), (76561198166465514, 70110, {'playtime_forever': 0, 'score': 0}), (76561198166465514, 8930, {'playtime_forever': 2646, 'score': 2646}), (76561198166465514, 70100, {'playtime_forever': 161, 'score': 161}), (76561198166465514, 400, {'playtime_forever': 2028, 'score': 2028})]
[(76561198164946427, 823130, {'playtime_forever': 0, 'score': 0}), (76561198164946427, 231430, {'playtime_forever': 0, 'score': 0}), (76561198164946427, 424840, {'playtime_forever': 296, 'score': 296}), (76561198164946427, 1384160, {'playtime_forever': 3021, 'score': 3

In [5]:
# Printing out game node degrees
test_data_loader = DataLoader(game_embeddings = ['name'])
network = test_data_loader.get_full_network()
degrees = [(game, degree) for game, degree in network.degree()]
degrees = [g for g in network.degree() if g[0] < 1999220]
degrees = sorted(degrees, key=lambda g: g[1], reverse=True)
out = [(network.nodes(data=True)[game]['name'], degree) for game, degree in degrees]
for x in out:
    print(x)

('Counter-Strike 2', 4756)
('PUBG: BATTLEGROUNDS', 3038)
('Left 4 Dead 2', 2862)
("Garry's Mod", 2851)
('Unturned', 2830)
('PAYDAY 2', 2735)
('Among Us', 2702)
('Apex Legends™', 2646)
("Tom Clancy's Rainbow Six® Siege", 2564)
('Warframe', 2395)
('Destiny 2', 2385)
('Grand Theft Auto V', 2349)
('Terraria', 2347)
('Rust', 2316)
('Brawlhalla', 2258)
('Portal 2', 2195)
('ARK: The Survival Of The Fittest', 2020)
('Aimlabs', 1938)
('ARK: Survival Evolved', 1932)
('Paladins®', 1925)
('Borderlands 2', 1924)
('VRChat', 1864)
('Z1 Battle Royale', 1827)
('War Thunder', 1826)
('Z1 Battle Royale: Test Server', 1826)
('Halo Infinite', 1770)
('Counter-Strike: Source', 1768)
('The Forest', 1683)
('Lethal Company', 1671)
('Phasmophobia', 1625)
("Don't Starve Together", 1576)
('Portal', 1564)
('Half-Life 2: Lost Coast', 1553)
('Splitgate', 1552)
('Insurgency', 1501)
('Half-Life 2', 1492)
('Call of Duty®', 1471)
('Path of Exile', 1468)
('SMITE®', 1468)
('Totally Accurate Battlegrounds', 1438)
('PlanetSid

In [4]:
# Printing out user node degrees
test_data_loader = DataLoader()
network = test_data_loader.get_full_network()
degrees = [(user, degree) for user, degree in network.degree()]
degrees = [g for g in network.degree() if g[0] > 10000000000000000]
degrees = sorted(degrees, key=lambda g: g[1], reverse=True)
out = [(user, degree) for user, degree in degrees]
for x in out:
    print(x)

(76561197988445370, 14110)
(76561198072833066, 13358)
(76561198057329243, 13273)
(76561197968357064, 11835)
(76561198045463085, 10190)
(76561197970359081, 9573)
(76561197961553883, 9533)
(76561198016075171, 9140)
(76561197979043237, 9136)
(76561198106393787, 8825)
(76561198014304296, 8237)
(76561197978225364, 7662)
(76561198075462979, 7150)
(76561198157198557, 7096)
(76561198010296742, 6944)
(76561198015615884, 6673)
(76561197994308495, 6537)
(76561197960315617, 6440)
(76561198069439929, 6180)
(76561198061742477, 5981)
(76561197977289691, 5790)
(76561197997240220, 5730)
(76561198002608541, 5576)
(76561198020204564, 5479)
(76561197996285059, 5306)
(76561198039886147, 4632)
(76561197968490503, 4456)
(76561198060841177, 4387)
(76561198216615571, 4276)
(76561198007226581, 4239)
(76561197970655441, 4179)
(76561198031178404, 4142)
(76561197995754619, 4064)
(76561198064299104, 3995)
(76561198254632411, 3837)
(76561198044477411, 3831)
(76561197970791901, 3808)
(76561198052440359, 3677)
(765611

In [6]:
# Testing for the game embeddings
test_data_loader = DataLoader(game_embeddings = ['name', 'numReviews', 'avgReviewScore', 'price', 'genres', 'tags', 'numFollowers'])
network = test_data_loader.get_full_network()

print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

[(76561198166465514, 17410, {'score': 1}), (76561198166465514, 620, {'score': 1}), (76561198166465514, 204360, {'score': 1}), (76561198166465514, 99900, {'score': 1}), (76561198166465514, 105600, {'score': 1}), (76561198166465514, 205100, {'score': 1}), (76561198166465514, 70110, {'score': 1}), (76561198166465514, 8930, {'score': 1}), (76561198166465514, 70100, {'score': 1}), (76561198166465514, 400, {'score': 1})]
[(76561198164946427, 823130, {'score': 1}), (76561198164946427, 231430, {'score': 1}), (76561198164946427, 424840, {'score': 1}), (76561198164946427, 1384160, {'score': 1}), (76561198164946427, 461040, {'score': 1}), (76561198164946427, 1730590, {'score': 1}), (76561198164946427, 1599340, {'score': 1}), (76561198164946427, 520720, {'score': 1}), (76561198164946427, 1966720, {'score': 1}), (76561198164946427, 2357570, {'score': 1})]
[(76561198166465514, {'node_type': <NodeType.USER: 1>}), (76561198001537869, {'node_type': <NodeType.USER: 1>}), (76561198027311040, {'node_type'

In [7]:
# Print games df sorted by a field column
print(test_data_loader.games_df.sort_values(by='numReviews', ascending=False).head(20)[['name', 'numReviews']])

                                name  numReviews
28                  Counter-Strike 2     7907477
421              PUBG: BATTLEGROUNDS     2309505
500               Grand Theft Auto V     1600887
4                           Terraria     1158939
499  Tom Clancy's Rainbow Six® Siege     1152469
97                       Garry's Mod      975278
304                             Rust      943317
504                    Apex Legends™      796468
128                    Left 4 Dead 2      759039
366        The Witcher® 3: Wild Hunt      714506
69                        ELDEN RING      690860
605                 Dead by Daylight      671445
462                         Among Us      657602
91             ARK: Survival Evolved      644366
535                   Cyberpunk 2077      633120
306           Euro Truck Simulator 2      627043
53                          PAYDAY 2      625355
385                   Stardew Valley      585100
66                         Destiny 2      583692
784                 

In [2]:
# Testing loading one snowball
print('All snowballs all users.')
test_data_loader = DataLoader(snowballs_ids = [], num_users_to_load_per_snowball=None)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

print('1 snowball all users.')
test_data_loader = DataLoader(snowballs_ids = ['76561198166465514'], num_users_to_load_per_snowball=None)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

print('all snowballs 100 users per snowball.')
test_data_loader = DataLoader(snowballs_ids = [], num_users_to_load_per_snowball=100)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

All snowballs all users.
Num edges:  1061684
Num nodes:  32836
1 snowball all users.
Num edges:  1061684
Num nodes:  32836
all snowballs 100 users per snowball.
Num edges:  14445
Num nodes:  4360


In [2]:
# Testing removing 0 playtime edges
print('No removing edges.')
test_data_loader = DataLoader(remove_edge_function = never_remove_edge)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

print('Removing 0 playtime edges.')
test_data_loader = DataLoader(remove_edge_function = remove_zero_playtime_edge, user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

No removing edges.
Num edges:  1061684
Num nodes:  32836
Removing 0 playtime edges.
Num edges:  572199
Num nodes:  32836


In [2]:
# Scoring a user with no edges.
data_loader = DataLoader(edge_scoring_function = playtime_forever_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)], remove_edge_function = remove_zero_playtime_edge, num_users_to_load_per_snowball = None)
network = data_loader.get_full_network()
print_game_edges_for_user(network, 76561199369782592)

Edges for user 76561199369782592:
[]
