In [134]:
import pandas as pd
import glob

In [135]:
class Ball(dict):
    # change to ordered dict
    def __init__(self):
        self['score'] = 0
        self['bat'] = 0
        self['wicket'] = False
        self['wide'] = False

In [188]:
class Innings(Ball):
    
    def __init__(self):
        self.bat_order = []
        self.ball_order = []
        
    def overs(self, balls = None):
        if balls == None:
            balls = len(self.balls())            
        return (balls / 6) + ((balls % 6) / 10.0) 

    def over_to_balls(self, over=None):
        if over == None:
            return max(ball_order)
        return int(round(int(over) * 6 + ((over % 1) * 10), 0))
        
    def score_at_time():
        pass
    
    
    def balls(self):
        return [b for b in self.values() if isinstance(b, Ball)]
    
    def runs(self):
        return [b['score'] for b in self.balls()]
    
    def wickets(self):
        return [b for b in self.balls() if b['wicket']]

    # batting metrics
    
    def bat_partnerships(self):
        return []
    
    def bat_score(self, batsman):        
        return sum(b['bat'] for b in self.balls() if (b['batsman'] == batsman))

    def bat_balls_faced(self, batsman):        
        return sum(1 for b in self.balls() if (b['batsman'] == batsman) & ~(b['wide']))
            
    
    def bat_out(self, batsman):
        return int(batsman in [b['player_out'] for b in self.wickets()])
    
    def bat_scorecard(self):
        score_board = []
        for batsman in self.bat_order:
            runs = self.bat_score(batsman)
            balls = self.bat_balls_faced(batsman)
            out = self.bat_out(batsman)
            score_board.append((batsman, runs, balls, out))
        
        cols = ['Batsmen', 'Runs', 'Balls Faced', 'Out']
        sb = pd.DataFrame(data=score_board, columns=cols)

        total = ('Total', sum(self.runs()), self.overlen(self.balls()), len(self.wickets()))
        extra = ('Extras', total[1] - sb['Runs'].sum(), total[2] - sb['Balls Faced'].sum(), 0)
        score_board.append(extra)
        score_board.append(total)
        
        sb = pd.DataFrame(data=score_board, columns=cols) 
        sb['Strike_Rate'] = sb['Runs']/sb['Balls Faced'] *100

        return sb

In [165]:
class Match(dict):    
    def __init__(self, match_id):
        self.match_id = match_id
        self.team = {}
        
    def bat_score(self, batsman):
        if batsman in self[1].bat_order:
            return self[1].bat_score(batsman)
        elif batsman in self[2].bat_order:
            return self[2].bat_score(batsman)
        else:
            return None
    
    def bat_out(self, batsman):
        return any([self[i].bat_out(batsman) for i in [1, 2]])
        
    def winner(self):
        pass       

In [166]:
class Player(dict):
    
    def __init__(self, name):
        self.name = name
        self.games = []
    
    def __repr__(self):
        return 'Bat Ave : {:0.1f}'.format(self.average())
    
    def total_runs(self):
        pass
    
    def average(self):
        # need to take into account not outs...
        total = [all_games[g].bat_score(self.name) for g in self.games]
        times_out = [int(all_games[g].bat_out(self.name)) for g in self.games]
        
        return sum(total) / sum(times_out)
    

In [167]:
def read_match(match_id):

    game = Match(match_id)
    f = open('IPL/{:s}.yaml'.format(match_id), 'r')

    for line in f.readlines():
        tabs = [a.strip('\n') for a in line.split('  ')]
        len_tabs = len(tabs)
        try:
            var, out = [a.strip() for a in tabs[-1].split(':')]
        except:
            var = None
            out = [a.strip() for a in tabs[-1].split('- ')]

        if len_tabs == 2:
            game[var] = out
            if (var == '- 1st innings'): 
                innings = game[1] = Innings()
            if (var == '- 2nd innings'):
                innings = game[2] = Innings()

        if len_tabs == 5:
            var = var.strip('- ')
            innings.ball_order.append(float(var))
            current_ball = innings[var] = Ball()

        if len_tabs == 7:
            if var == 'wicket':
                current_ball['wicket'] = True
            else:
                current_ball[var] = out

            if var == 'batsman':
                if out not in innings.bat_order:
                    innings.bat_order.append(out)       

        if len_tabs == 8:
            if (var == 'total'):
                current_ball['score'] = int(out)
            if (var == 'batsman'):
                current_ball['bat'] = int(out)
            if (var == 'wides'):
                current_ball['wide'] = True
            
            if current_ball['wicket']:
                current_ball[var] = out
                
    return game
        
            

In [168]:
a = read_match('335982')

In [169]:
a[1]['5.2']

{'bat': 0,
 'batsman': 'SC Ganguly',
 'bowler': 'Z Khan',
 'fielders': '',
 'kind': 'caught',
 'non_striker': 'BB McCullum',
 'player_out': 'SC Ganguly',
 'runs': '',
 'score': 0,
 'wicket': True,
 'wide': False}

In [170]:
a[1].bat_scorecard()

Unnamed: 0,Batsmen,Runs,Balls Faced,Out,Strike_Rate
0,SC Ganguly,10,12.0,1,83.333333
1,BB McCullum,158,73.0,0,216.438356
2,RT Ponting,20,20.0,1,100.0
3,DJ Hussey,12,12.0,1,100.0
4,Mohammad Hafeez,5,3.0,0,166.666667
5,Extras,17,-99.6,0,-17.068273
6,Total,222,20.4,3,1088.235294


In [115]:
all_games = {}
all_batsmen = {}

for file_name in glob.glob('IPL/*.yaml'):
    match_id = file_name.strip('IPL/').strip('.yaml')
    print ('reading in match id {:s}'.format(match_id))
    curr_match = all_games[match_id] = read_match(match_id)
    
    for bat in curr_match[1].bat_order + curr_match[1].bat_order:
        if bat not in all_batsmen.keys():
            all_batsmen[bat] = Player(bat) 
        all_batsmen[bat].games.append(match_id)
        

reading in match id \335982
reading in match id \335983
reading in match id \335984
reading in match id \335985
reading in match id \335986
reading in match id \335987
reading in match id \335988
reading in match id \335989
reading in match id \335990
reading in match id \335991
reading in match id \335992
reading in match id \335993
reading in match id \335994
reading in match id \335995
reading in match id \335996
reading in match id \335997
reading in match id \335998
reading in match id \335999
reading in match id \336000
reading in match id \336001
reading in match id \336002
reading in match id \336003
reading in match id \336004
reading in match id \336005
reading in match id \336006
reading in match id \336007
reading in match id \336008
reading in match id \336009
reading in match id \336010
reading in match id \336011
reading in match id \336012
reading in match id \336013
reading in match id \336014
reading in match id \336015
reading in match id \336016
reading in match id 

In [116]:
all_batsmen['BB McCullum']

Bat Ave : 30.0

In [117]:
all_games['\\335982'].bat_score('BB McCullum')

158

In [118]:
all_games['\\335982'][1].bat_scorecard()

Unnamed: 0,Batsmen,Runs,Balls Faced,Out
0,SC Ganguly,10,12,1
1,BB McCullum,158,73,0
2,RT Ponting,20,20,1
3,DJ Hussey,12,12,1
4,Mohammad Hafeez,5,3,0
5,Extras,17,4,0
6,Total,222,124,3
