In [17]:
import pandas as pd
import numpy as np
import urllib.request
import os
from progressbar import ProgressBar
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
import PIL.ImageOps    
from PIL import Image
import itertools
import math
import sys
from shutil import copyfile

html_src = './results/simple_AE_2d_11/index.html'
result_path = './results/'

def constract_graph(source, target, weight):
    if not len(source)==len(target)==len(weight):
        raise Exception('Source, target and weight not match.')
    G = nx.Graph()
    for i in range(len(source)):
        G.add_edge(source[i], target[i], weight=weight[i])
    return G

def p2f(x):
    return float(x.strip('%'))/100

In [18]:
price = pd.read_csv('data/price.csv')
price.rename(columns={'AppID':'id'}, inplace=True)
price.set_index('id', inplace=True)

In [19]:
detail = pd.read_csv('data/detail.csv')
detail.rename(columns={'AppID':'id'}, inplace=True)
detail.set_index('id', inplace=True)

In [20]:
more_info = pd.read_csv('data/id.csv')
more_info.set_index('id', inplace=True)
more_info = more_info.fillna(0)
more_info["ser_id"] = pd.to_numeric(more_info["ser_id"], downcast='integer')
on_sale = pd.read_csv('data/discount.csv')
on_sale.rename(columns={'AppID':'id'}, inplace=True)
on_sale.set_index('id', inplace=True)
detail = detail.join(more_info)
detail = detail.join(on_sale)

In [21]:
game_data = pd.read_csv('data/total_data.csv',  converters={'Rating':p2f})
game_data.set_index('AppID', inplace=True)

In [27]:
game_data

Unnamed: 0_level_0,Name,Positive,Negative,Rating,Current,24h Peak,All-Time Peak
AppID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10,Counter-Strike,133134,3541,0.9606,6299,14655,319586
20,Team Fortress Classic,3591,670,0.8151,32,102,1897
30,Day of Defeat,3623,435,0.8606,41,149,7758
40,Deathmatch Classic,1365,290,0.7899,6,20,629
50,Half-Life: Opposing Force,5670,314,0.9149,85,171,254
60,Ricochet,2962,717,0.7793,7,19,95
70,Half-Life,30813,1167,0.9431,461,933,2167
80,Counter-Strike: Condition Zero,13229,1499,0.8761,285,676,32748
130,Half-Life: Blue Shift,4168,461,0.8689,44,113,174
220,Half-Life 2,72913,2488,0.9511,790,1502,12953


In [23]:
files = [f for f in os.listdir(result_path) if not os.path.isfile(f)]

In [24]:
files

['simple_AE_100d_16',
 'simple_AE_80d_10',
 'vgg_50d_11',
 'simple_AE_90d_9_degree',
 'simple_AE_5d_12',
 'simple_AE_40d_9_degree',
 'vgg_30d_8',
 'simple_AE_30d_10_degree',
 'simple_AE_50d_67',
 'vgg_10d_7',
 'vgg_20d_6',
 'simple_AE_2d_11_degree',
 'simple_AE_60d_13',
 'simple_AE_70d_27_degree',
 'simple_AE_70d_13',
 'simple_AE_20d_11_degree',
 'vgg_5d_8',
 'simple_AE_5d_12_degree',
 'simple_AE_100d_13',
 'simple_AE_2d_11',
 'simple_AE_40d_13',
 'simple_AE_80d_26_degree',
 'simple_AE_50d_7_degree',
 'simple_AE_90d_12',
 'simple_AE_100d_5_degree',
 'vgg_100d_5',
 'vgg_3d_6',
 'simple_AE_3d_8_degree',
 'vgg_200d_6',
 'simple_AE_20d_12',
 'simple_AE_60d_6_degree',
 'vgg_2d_10',
 'simple_AE_3d_9',
 'simple_AE_30d_13']

In [26]:
pbar = ProgressBar()
for file in pbar(files):
    target = result_path+file+'/'
    edges = pd.read_csv(target+'edge.csv')
    nodes = pd.read_csv(target+'node.csv')
    edges.rename(columns={'Source':'source',
                          'Target':'target',
                          'Weight':'weight'},
                 inplace=True)
    nodes.rename(columns={'Id':'id', 'modularity_class':'group'},
                 inplace=True)
    nodes.set_index('id',inplace=True)
    edges.drop(columns=['Type', 'Id', 'Label', 'timeset'], inplace=True)
    nodes.drop(columns=['Label', 'timeset'], inplace=True)
    G = constract_graph(list(edges['source']), list(edges['target']), list(edges['weight']))
    constrains = pd.read_csv(target+'parameter.csv')
    w_threhold = 0
    d_threhold = 0
    try:
        pre_edges = nx.number_of_edges(G)
        w_threhold = constrains.loc[0]['weight']
        remove_edges = list()
        G_weights = nx.get_edge_attributes(G, 'weight')
        for edge in nx.edges(G):
            if G_weights[edge] > w_threhold:
                remove_edges.append(edge)
        G.remove_edges_from(remove_edges)
        aft_edges = nx.number_of_edges(G)
#         print(str(pre_edges)+' edges->'+str(aft_edges)+' edges.')
    except:
#         print('no weight constrain')
        pass
    try:
        pre_nodes = nx.number_of_nodes(G)
        d_threhold = constrains.loc[0]['degree']
        remove = [node for node,degree in G.degree() if degree > d_threhold]
        G.remove_nodes_from(remove)
        aft_nodes = nx.number_of_nodes(G)
#         print(str(pre_nodes)+' nodes->'+str(aft_nodes)+' nodes.')
    except:
#         print('no degree constrain')
        pass
    nx.write_weighted_edgelist(G, target+'tempt.csv', delimiter=',')
    new_edges = pd.read_csv(target+'tempt.csv', header=None).rename(columns={0:'source',1:'target',2:'weight'})
    new_nodes = pd.DataFrame(list(nx.nodes(G))).rename(columns={0:'id'}).set_index('id')
    new_nodes = new_nodes.join(nodes)
    new_nodes = new_nodes.join(price)
    new_nodes = new_nodes.join(detail)
    new_nodes = new_nodes.join(game_data)
    new_nodes.index.name = 'id'
    new_edges.to_csv(target+'pro_edges.csv', index=False)
    new_nodes.rename(columns={"24h Peak":"24hPeak","All-Time Peak":"AllTime"})
    new_nodes.to_csv(target+'pro_nodes.csv', index=True)
    try:
        copyfile(html_src, target+'index.html')
    except:
        pass
#         print('same file error')
#     print(target+' completed.')
    

100% |########################################################################|


In [53]:
constrains = pd.read_csv(target+'parameter.csv')

In [28]:
new_nodes

Unnamed: 0_level_0,group,price,company,date,tag,ser_id,discount,Name,Positive,Negative,Rating,Current,24h Peak,All-Time Peak
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10,73,9.99,Valve,2000,1774,0,1.0,Counter-Strike,133134,3541,0.9606,6299,14655,319586
50,48,4.99,Valve,1999,4667,6,1.0,Half-Life: Opposing Force,5670,314,0.9149,85,171,254
70,35,9.99,Valve,1998,4667,6,1.0,Half-Life,30813,1167,0.9431,461,933,2167
130,59,4.99,Valve,2001,4667,6,1.0,Half-Life: Blue Shift,4168,461,0.8689,44,113,174
220,59,9.99,Valve,2004,4667,6,1.0,Half-Life 2,72913,2488,0.9511,790,1502,12953
240,59,9.99,Valve,2004,1774,0,1.0,Counter-Strike: Source,85237,3742,0.9431,2710,6138,100056
280,35,9.99,Valve,2004,4667,6,1.0,Half-Life: Source,4135,1120,0.7651,75,153,452
320,2,4.99,Valve,2004,4667,6,1.0,Half-Life 2: Deathmatch,6361,809,0.8604,113,255,2702
340,12,0.00,Valve,2005,4667,6,0.0,Half-Life 2: Lost Coast,6035,1045,0.8280,14,38,180
360,36,9.99,Valve,2006,4667,6,1.0,Half-Life Deathmatch: Source,1476,489,0.7255,13,27,79


In [12]:
G = constract_graph(list(edges['source']), list(edges['target']), list(edges['weight']))

In [11]:
on_sale

Unnamed: 0_level_0,discount
id,Unnamed: 1_level_1
10.0,1.0
50.0,1.0
70.0,1.0
130.0,1.0
220.0,1.0
240.0,1.0
280.0,1.0
320.0,1.0
340.0,0.0
360.0,1.0


Unnamed: 0,group,price,company,date,tag,ser_id,Name,Positive,Negative,Rating,Current,24h Peak,All-Time Peak
10,73,9.99,Valve,2000,1774,,Counter-Strike,133134,3541,96.06%,6299,14655,319586
50,48,4.99,Valve,1999,4667,6.0,Half-Life: Opposing Force,5670,314,91.49%,85,171,254
70,35,9.99,Valve,1998,4667,6.0,Half-Life,30813,1167,94.31%,461,933,2167
130,59,4.99,Valve,2001,4667,6.0,Half-Life: Blue Shift,4168,461,86.89%,44,113,174
220,59,9.99,Valve,2004,4667,6.0,Half-Life 2,72913,2488,95.11%,790,1502,12953
240,59,9.99,Valve,2004,1774,,Counter-Strike: Source,85237,3742,94.31%,2710,6138,100056
280,35,9.99,Valve,2004,4667,6.0,Half-Life: Source,4135,1120,76.51%,75,153,452
320,2,4.99,Valve,2004,4667,6.0,Half-Life 2: Deathmatch,6361,809,86.04%,113,255,2702
340,12,0.00,Valve,2005,4667,6.0,Half-Life 2: Lost Coast,6035,1045,82.80%,14,38,180
360,36,9.99,Valve,2006,4667,6.0,Half-Life Deathmatch: Source,1476,489,72.55%,13,27,79
