In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from collections import defaultdict
import pickle
import networkx as nx
%matplotlib inline

In [2]:
folder = 'D:/OMSCS/CSE6242/Project/Plot1/data/'

In [81]:
with open(folder + 'player_net', 'rb') as handle:
    player_net = pickle.load(handle)

In [82]:
with open(folder+'summary', 'rb') as handle:
    summary = pickle.load(handle)

In [83]:
id_set = set(summary.account_id.values)

In [84]:
# get "key" players whose degree of connection is greater than 1000
connections = {player_id: len(player_net[player_id]) for player_id in player_net if len(player_net[player_id]) > 1000 and player_id in id_set}
# shrink the player_net for the first time, replace original to reduce memory footprint
small_player_net = {player_id: player_net[player_id] for player_id in connections}

In [85]:
def get_teammate_graph(graph):
    res = {}
    for node in graph:
        res[node] = []
        for adjacent in graph[node]:
            res[node].append([adjacent, graph[node][adjacent][0]])
    return res

In [94]:
teammate_net = get_teammate_graph(small_player_net)
print(len(teammate_net))

489


In [95]:
def keep_teammate_edge(graph):
    """
    shrink graph and only include keep same team connections as valid edge
    :graph graph representation of player network. {PlayerA: [[PlayerB, cnt_same_team]]}
    :returns graph representation of player network. {PlayerA: [[PlayerB, cnt_same_team]]}
    """
    connections = set(list(graph.keys()))
    # initialize shrinked graph
    shrinked_graph = {}
    for node in graph:
        shrinked_graph[node] = []
        for adjacent, weight in graph[node]:
            if adjacent not in connections:
                continue            
            if weight > 0:
                shrinked_graph[node].append([adjacent, weight])
        
        # sort connections by weight
        shrinked_graph[node] = sorted(shrinked_graph[node], key=lambda item: item[1], reverse=1)
        
        if len(shrinked_graph[node])==0:
            shrinked_graph.pop(node, None)
            
    return shrinked_graph

In [96]:
def remove_those_without_friends(graph, threshold):
    """
    remove nodes whose max number of matches with any of its connections is smaller than certain thrsold
    :graph - graph representation of player network. {PlayerA: [[PlayerB, cnt_same_team]]}
    :threshold - the number of matches
    :returns None, operation in place
    """
    for node in list(graph.keys()):
        max_degree = max([item[1] for item in graph[node]])
        if max_degree < threshold:
            graph.pop(node, None)
    return

In [97]:
shrinked_player_net = keep_teammate_edge(teammate_net)
print(len(shrinked_player_net))

461


In [98]:
remove_those_without_friends(shrinked_player_net, 10)
print(len(shrinked_player_net))

99


In [99]:
shrinked_player_net = keep_teammate_edge(shrinked_player_net)
print(len(shrinked_player_net))

99


In [100]:
for player_id in list(shrinked_player_net.keys()):
    new_list = []
    for item in shrinked_player_net[player_id]:        
        if item[0] in shrinked_player_net:
            new_list.append(item)
    shrinked_player_net[player_id] = new_list

In [101]:
len(shrinked_player_net)

99

In [102]:
with open(folder+'shrinked_player_net_v2', 'wb') as handle:
    pickle.dump(shrinked_player_net, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [103]:
def get_connections(net):
    res = pd.DataFrame()
    players = list(net.keys())
    res['account_id'] = players
    res['freq_0'] = None
    res['freq_1'] = None
    res['freq_2'] = None
    res['freq_0_count'] = None
    res['freq_1_count'] = None
    res['freq_2_count'] = None
    
    for i in range(len(players)):
        player = players[i]
        for rank in range(min(3, len(net[player]))):
            col_name = 'freq_'+str(rank)
            if net[player][rank][1] > 1:
                res.loc[i, col_name] = net[player][rank][0]
                res.loc[i, col_name+'_count'] = net[player][rank][1]
    return res

In [104]:
player_connections = get_connections(shrinked_player_net)

In [105]:
player_connections

Unnamed: 0,account_id,freq_0,freq_1,freq_2,freq_0_count,freq_1_count,freq_2_count
0,68,17194,70,,194,45,
1,70,68,17194,,45,45,
2,71,26492,18989,114307,108,51,25
3,517,523,12205,,19,3,
4,523,517,,,19,,
...,...,...,...,...,...,...,...
94,117754,117753,,,294,,
95,124495,84885,,,68,,
96,130367,111061,,,192,,
97,136179,136180,5915,26886,272,3,2


In [106]:
final = player_connections.merge(summary, how='left')
final

Unnamed: 0,account_id,freq_0,freq_1,freq_2,freq_0_count,freq_1_count,freq_2_count,match_count,hero_id,hero_name,total_wins,total_matches,trueskill_mu,trueskill_sigma,win_rate
0,68,17194,70,,194,45,,8,100,Tusk,152,269,26.599750,1.644997,0.565056
1,70,68,17194,,45,45,,27,90,Keeper of the Light,102,212,26.678709,1.708360,0.481132
2,71,26492,18989,114307,108,51,25,3,21,Windranger,129,238,29.337633,1.705819,0.542017
3,517,523,12205,,19,3,,1,106,Ember Spirit,172,341,27.013098,1.505167,0.504399
4,523,517,,,19,,,2,70,Ursa,158,239,32.408251,1.833717,0.661088
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,117754,117753,,,294,,,3,44,Phantom Assassin,147,296,26.212674,1.872034,0.496622
95,124495,84885,,,68,,,1,9,Mirana,95,186,26.916627,2.116692,0.510753
96,130367,111061,,,192,,,1,35,Sniper,104,224,23.008016,1.967649,0.464286
97,136179,136180,5915,26886,272,3,2,2,11,Shadow Fiend,165,325,25.330560,1.645193,0.507692


In [107]:
final.to_csv(folder+'node_v2.csv')

In [108]:
def get_edges(net):
    source = []
    target = []
    for p in net:
        for t in net[p]:
            source.append(p)
            target.append(t[0])
    res = pd.DataFrame()
    res['source'] = source
    res['target'] = target
    return res

In [109]:
edges = get_edges(shrinked_player_net)

In [110]:
edges

Unnamed: 0,source,target
0,68,17194
1,68,70
2,68,57487
3,68,65879
4,70,68
...,...,...
385,136179,4252
386,136179,53797
387,136180,136179
388,136180,6647


In [111]:
edges.to_csv(folder+'edges_v2.csv')

In [112]:
len(shrinked_player_net)

99