In [1]:
import json
import sys
import pandas as pd

import requests
from bs4 import BeautifulSoup
from espncricinfo.match import Match

In [2]:
def get_html(player_id):
    url = "https://www.espncricinfo.com/ci/content/player/{0}.html".format(str(player_id))
    r = requests.get(url)
    if r.status_code == 404:
        raise PlayerNotFoundError
    else:
        soup = BeautifulSoup(r.text, 'html.parser')
        return soup

In [4]:
# load sample data
with open('../data/sample_data.json') as json_file:
    sample_data = json.load(json_file)

In [5]:
g_num = sample_data['innings1']['batsmen']
g_num

{'SC Ganguly': '28779',
 'BB McCullum': '37737',
 'RT Ponting': '7133',
 'DJ Hussey': '5766',
 'Mohammad Hafeez': '41434',
 'LR Shukla': '34019',
 'WP Saha': '279810',
 'AB Agarkar': '26184',
 'AB Dinda': '227712',
 'M Kartik': '30049',
 'I Sharma': '236779'}

In [6]:
def get_playing_stats(player_num):
    """
    Gets player stats of choice based on number
    """
    player_html = get_html(player_num)
    tables = player_html.findAll('table', class_='engineTable')

    batting = pd.read_html(str(tables[0]))[0]
    bowl = pd.read_html(str(tables[1]))[0]
    batting_stats = batting[['Mat', 'Inns', 'Ave', 'SR', 'BF']]
    batting_stats = batting_stats.rename(
        columns= {
            'Inns': 'bat_inns', 
            'Ave':'bat_ave', 
            'SR': 'bat_SR' ,
            'BF': 'bat_bf'
        }
    )
    T20batting_stats = batting_stats.iloc[-1]

    fielding_stats = batting[['Ct', 'St']]
    T20fielding_stats = fielding_stats.iloc[-1]

    bowling_stats = bowl[['Wkts', 'Ave', 'Econ', 'SR', 'Balls']]
    bowling_stats = bowling_stats.rename(
        columns= {
            'Ave':'bowl_ave', 
            'SR': 'bowl_SR' ,
            'Econ': 'bowl_econ',
            'Balls': 'bowl_balls'
        }
    )
    T20bowling_stats = bowling_stats.iloc[-1]
    
    return pd.concat(
        [T20batting_stats, T20bowling_stats, T20fielding_stats]
    )

In [7]:
get_playing_stats('28779')

Mat             77.00
bat_inns        73.00
bat_ave         25.01
bat_SR         107.00
bat_bf        1613.00
Wkts            29.00
bowl_ave        26.06
bowl_econ        7.91
bowl_SR         19.70
bowl_balls     573.00
Ct              28.00
St               0.00
Name: 4, dtype: float64