In [1]:
from espncricinfo.match import Match
import json
from bs4 import BeautifulSoup
import pandas as pd
import lxml

In [2]:
# load year_matchnum_dict
with open('/Users/jasonmyers/Documents/Coding_playground/cric/data/ipl_year_match_num.json') as json_file:
    year_matchnum_dict = json.load(json_file)


In [108]:
sample = year_matchnum_dict['2008'][4]

In [102]:
def get_bat_bowl_htmls(match_class):
    """
    Gets the html from match number that contains the batsmen in order and bowlers in order
    for each innings.
    """
    
    soup = BeautifulSoup(str(match.html), 'lxml')
    
    bat_inn1 = soup.findAll("table", {"class":"table batsman"})[0]
    bat_inn2 = soup.findAll("table", {"class":"table batsman"})[1]

    bowl_inn1 = soup.findAll("table", {"class":"table bowler"})[0]
    bowl_inn2 = soup.findAll("table", {"class":"table bowler"})[1]


    return (
        bat_inn1,
        bat_inn2,
        bowl_inn1,
        bowl_inn2
    )

In [103]:
def get_player_val_dict(html_script):
    """
    Gets the player number and name based on inputted html script
    """
    
    all_players = html_script.findAll("a", {"class":"small"})
    player_dict = {}
    for player in all_players:
        try:
            k = player.contents[0].contents[0]
        except AttributeError:
            k = player.contents[0]
        player_num = str(player.get('href')).split('/')[-1].replace('.html', '')
        player_dict[k] = player_num 

    return player_dict

In [106]:
def get_match_summary_info(match):
    """
    Get summary of match including batsmen, bowlers and totals
    for each innnings
    """
    
    innings1 = match.innings[0]
    innings2 = match.innings[1]

    html_bat_inn1, html_bat_inn2, html_bowl_inn1, html_bowl_inn2 = get_bat_bowl_htmls(match)


    return {
        'innings1': {
            'batsmen': get_player_val_dict(html_bat_inn1),
            'bowlers': get_player_val_dict(html_bowl_inn1),
            'total_runs': innings1['runs'],
            'total_wickets': innings1['wickets']
        },
        'innings2': {
            'batsmen': get_player_val_dict(html_bat_inn2),
            'bowlers': get_player_val_dict(html_bowl_inn2),
            'total_runs': innings2['runs'],
            'total_wickets': innings2['wickets']
        }
    }

In [109]:
match = Match(sample)
get_match_summary_info(match)

{'innings1': {'batsmen': {'L Ronchi': '7502',
   'ST Jayasuriya': '49209',
   'DJ Thornely': '8002',
   'RV Uthappa': '35582',
   'PR Shah': '34282',
   'AM Nayar': '32091',
   'SM Pollock': '46774',
   'Harbhajan Singh': '29264',
   'MA Khote': '30220',
   'A Nehra': '31820',
   'DS Kulkarni': '277955'},
  'bowlers': {'P Kumar': '30732',
   'Z Khan': '30102',
   'JH Kallis': '45789',
   'R Vinay Kumar': '35731',
   'B Akhil': '26206',
   'SB Joshi': '29725'},
  'total_runs': '165',
  'total_wickets': '6'},
 'innings2': {'batsmen': {'S Chanderpaul': '51469',
   'R Dravid': '28114',
   'LRPL Taylor': '38699',
   'JH Kallis': '45789',
   'V Kohli': '253802',
   'MV Boucher': '44111',
   'B Akhil': '26206',
   'P Kumar': '30732',
   'Z Khan': '30102',
   'SB Joshi': '29725',
   'R Vinay Kumar': '35731'},
  'bowlers': {'A Nehra': '31820',
   'SM Pollock': '46774',
   'DS Kulkarni': '277955',
   'ST Jayasuriya': '49209',
   'Harbhajan Singh': '29264',
   'AM Nayar': '32091'},
  'total_runs'