In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(''), '../'))

from dataset.data_loader import DataLoader, FriendEdgeEncoding, NodeType, playtime_forever_edge_scoring_function, LinearNormalizer, GaussianNormalizer, PercentileNormalizer, print_game_edges_for_user, never_remove_edge, remove_zero_playtime_edge
from matplotlib import pyplot as plt
import networkx as nx

In [2]:
test_data_loader = DataLoader()
test_data_loader.get_data_for_user(76561198090678973)

                user_id  game_id  playtime_2weeks  playtime_forever
0     76561198090678973    29800                0                65
1     76561198090678973     4000                0               178
2     76561198090678973    17470                0               243
3     76561198090678973    17390                0               246
4     76561198090678973     2590                0                 0
...                 ...      ...              ...               ...
1399  76561198090678973  1712840                0                 0
1400  76561198090678973      630                0                21
1401  76561198090678973   489630                0                 0
1402  76561198090678973   249050                0                 0
1403  76561198090678973      570                0                60

[1404 rows x 4 columns]


In [2]:
# Testing the different normalization functions
user = 76561198166465514

test_data_loader = DataLoader(score_normalizers = [LinearNormalizer(0.0, 1.0)])
network = test_data_loader.get_full_network()
print('Constant with linear norm applied')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('No norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [LinearNormalizer(0.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Linear norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Gaussian norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [PercentileNormalizer()], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Percentile norm')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [LinearNormalizer(0.0, 1.0), GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Linear then gaussian')
print_game_edges_for_user(network, user)
print()

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = playtime_forever_edge_scoring_function, score_normalizers = [PercentileNormalizer(), GaussianNormalizer(1.0, 1.0)], user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('percentile then gaussian')
print_game_edges_for_user(network, user)
print()

In [None]:
# Testing splits
test_data_loader = DataLoader()
train_network, test_network = test_data_loader.load_random_train_test_network()
train_network, test_network = test_data_loader.load_stratified_user_degree_train_test_network()
train_network, test_network = test_data_loader.load_stratified_user_train_test_network()

In [None]:
# Testing friendship encoding
test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.NONE, edge_scoring_function = (lambda edge_data: edge_data['playtime_forever']), score_normalizers = [], user_embeddings = [], game_embeddings = ['name'], user_game_edge_embeddings = ['playtime_forever'], friend_friend_edge_embeddings = [])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

test_data_loader = DataLoader(friendship_edge_encoding=FriendEdgeEncoding.ALL_FRIENDSHIPS, score_normalizers = [], user_embeddings = [], game_embeddings = [], user_game_edge_embeddings = ['example_sum_user_id_game_id_playtime_forever'], friend_friend_edge_embeddings = [])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

test_data_loader = DataLoader(friendship_edge_encoding = FriendEdgeEncoding.BETWEEN_USERS, edge_scoring_function = playtime_forever_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)])
network = test_data_loader.get_full_network()
train_network, test_network = test_data_loader.load_stratified_user_train_test_network(network=network, train_percentage=0.8, test_percentage=0.2, seed=0)
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))
print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

# selected_nodes = [node for node in network.nodes() if network.degree(node) > 1]
# subgraph = network.subgraph(selected_nodes)
# nx.draw(subgraph, node_size=5)
# plt.show()

In [None]:
# Printing out game node degrees
test_data_loader = DataLoader(game_embeddings = ['name'])
network = test_data_loader.get_full_network()
degrees = [(game, degree) for game, degree in network.degree()]
degrees = [g for g in network.degree() if g[0] < 1999220]
degrees = sorted(degrees, key=lambda g: g[1], reverse=True)
out = [(network.nodes(data=True)[game]['name'], degree) for game, degree in degrees]
for x in out:
    print(x)

In [None]:
# Printing out user node degrees
test_data_loader = DataLoader()
network = test_data_loader.get_full_network()
degrees = [(user, degree) for user, degree in network.degree()]
degrees = [g for g in network.degree() if g[0] > 10000000000000000]
degrees = sorted(degrees, key=lambda g: g[1], reverse=True)
out = [(user, degree) for user, degree in degrees]
for x in out:
    print(x)

In [None]:
# Testing for the game embeddings
test_data_loader = DataLoader(game_embeddings = ['name', 'numReviews', 'avgReviewScore', 'price', 'genres', 'tags', 'numFollowers'])
network = test_data_loader.get_full_network()

print(list(network.edges(data=True))[:10])
print(list(network.edges(data=True))[-10:])
print(list(network.nodes(data=True))[:10])
print(list(network.nodes(data=True))[-10:])

In [None]:
# Print games df sorted by a field column
print(test_data_loader.games_df.sort_values(by='numReviews', ascending=False).head(20)[['name', 'numReviews']])

In [None]:
# Testing loading one snowball
print('All snowballs all users.')
test_data_loader = DataLoader(snowballs_ids = [], num_users_to_load_per_snowball=None)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

print('1 snowball all users.')
test_data_loader = DataLoader(snowballs_ids = ['76561198166465514'], num_users_to_load_per_snowball=None)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

print('all snowballs 100 users per snowball.')
test_data_loader = DataLoader(snowballs_ids = [], num_users_to_load_per_snowball=100)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

In [None]:
# Testing removing 0 playtime edges
print('No removing edges.')
test_data_loader = DataLoader(remove_edge_function = never_remove_edge)
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

print('Removing 0 playtime edges.')
test_data_loader = DataLoader(remove_edge_function = remove_zero_playtime_edge, user_game_edge_embeddings = ['playtime_forever'])
network = test_data_loader.get_full_network()
print('Num edges: ', len(network.edges))
print('Num nodes: ', len(network.nodes))

In [None]:
# Scoring a user with no edges.
data_loader = DataLoader(edge_scoring_function = playtime_forever_edge_scoring_function, user_game_edge_embeddings = ['playtime_forever'], score_normalizers = [GaussianNormalizer(1.0, 1.0)], remove_edge_function = remove_zero_playtime_edge, num_users_to_load_per_snowball = None)
network = data_loader.get_full_network()
print_game_edges_for_user(network, 76561199369782592)