In [8]:
import bz2
import json
import pandas
import collections
import argparse

def filter_events(events, time_point=60*5):
    return [event for event in events if event['time'] <= time_point]

def get_comand(player_id):
    if (player_id)/5==0:
        return 'r'
    else:
        return 'd'

def extract_match_features(match, time_point=60*5):
    feats = [
        ('match_id', match['match_id']),
    ]
    
    towerkill_player_id = -1
    for objectiv in match['objectives']:
        if objectiv['time'] <= 300:
            if objectiv['type'] == 'tower_kill':
                towerkill_player_id = objectiv['player']
        
    first_blood_palyer = -1
    first_looser_palyer = -1
    min_time = time_point
    for player_id,player in enumerate(match['players']):
        kills = filter_events(player['kills_log'], time_point)
        for kill in kills:
            if kill['time'] < min_time and kill['player']!=None:
                first_blood_palyer = player_id
                first_looser_palyer = kill['player']
        
    for i in range(0,10):
        if i != towerkill_player_id:
            feats += [('%s_player%i_towerkill' % (get_comand(i),(i%5+1)),0)]
        else:
            feats += [('%s_player%i_towerkill' % (get_comand(i),(i%5+1)),1)]
        if i != first_blood_palyer:
            feats += [('%s_player%i_firstblood' % (get_comand(i),(i%5+1)),0)]
        else:
            feats += [('%s_player%i_firstblood' % (get_comand(i),(i%5+1)),1)]
        if i != first_looser_palyer:
            feats += [('%s_player%i_firstloose' % (get_comand(i),(i%5+1)),0)]
        else:
            feats += [('%s_player%i_firstloose' % (get_comand(i),(i%5+1)),1)]
    return collections.OrderedDict(feats)


def iterate_matches(matches_filename):
    with bz2.BZ2File(matches_filename) as f:
        for n, line in enumerate(f):
            match = json.loads(line)
            yield match
            if (n+1) % 1000 == 0:
                print 'Processed %d matches' % (n+1)

                
def create_table(matches_filename, time_point):
    df = {}
    fields = None
    for match in iterate_matches(matches_filename):
        features = extract_match_features(match, time_point)
        if fields is None:
            fields = features.keys()
            df = {key: [] for key in fields}    
        for key, value in features.iteritems():
            df[key].append(value)
    df = pandas.DataFrame.from_records(df).ix[:, fields].set_index('match_id').sort_index()
    return df

In [None]:
features_table = create_table("matches.jsonlines.bz2", 5*60)
features_table.to_csv("data/tower_personal_and_firstblood_features.csv")

Processed 1000 matches
Processed 2000 matches
Processed 3000 matches
Processed 4000 matches
Processed 5000 matches
Processed 6000 matches
