### Get DB Connection string and connect to Oracle DB

In [1]:
import os
import import_ipynb
import Connections as conn
import cx_Oracle
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
import time
import datetime
import re
import math

importing Jupyter notebook from Connections.ipynb


### Define functions

In [2]:
# Establish connection with Oracle Database
def orcl_conn(): 
    connstr = conn.conn_string()
    connection = cx_Oracle.connect(connstr)
    return connection

# Fetch matches list from user through input
def match_list_input(): 
    matchid_list=[]
    print('Enter the Cricinfo matchids for the required matches. To stop, please press Enter')
    while True:
        text=input()
        if text:
            matchid_list.append(text)
            continue
        else:    
            break
    return matchid_list  

# Fetch matches list based on files present in directory
def match_list_file():
    matchid_list=[]
    input_path=r'C:/Users/ninju/OneDrive/Desktop/Cricket_Analysis/Cricsheet data/Input folder'
    matchid_list = [f.split('.')[0] for f in os.listdir(input_path)]
    return matchid_list

# Reprocess match ids which have been logged into error log table
def match_list_error_reprocess():
    connection = orcl_conn()
    matchid_list=[]
    print('Reprocessing match ids which didnt get loaded due to errors')
    cursor = connection.cursor()
    cursor.execute("select distinct match_id from tgt_t20_dbo.error_log")
    result = cursor.fetchall()
    for row in result:
        matchid_list.append(row[0])
    cursor.close()
    connection.close()
    return matchid_list
    
# Log match ids for which execution didn't complete due to errors
def error_log(mid,err):
    print('Error while loading match {match} data'.format(match=mid))
    print('Error logging begins for match {match}'.format(match=mid))
    cursor = connection.cursor()
    sql_qry = "insert into tgt_t20_dbo.error_log (match_id,error_msg) values (:1,:2)"
    cursor.execute(sql_qry,(mid,str(err)))
    connection.commit()
    cursor.close()
    print('Error logging successful')
    print(' ')
    
# Check players table if player data already exists    
def check_player_exists(player_id):
    cursor = connection.cursor()
    sql_qry = "select count(*) from tgt_t20_dbo.players where player_id = :1"
    cursor.execute(sql_qry,(player_id))
    result = cursor.fetchall()
    return result

def load_api_response(mid):
    driver = webdriver.Chrome("../chromedriver_win32/chromedriver.exe")

    url_page_ha_scr = 'https://hsapi.espncricinfo.com/v1/pages/match/scoreboard?lang=en&leagueId=8048&eventId={match}&liveTest=false&qaTest=false'.format(match=mid)
    driver.get(url_page_ha_scr)
    content_ha_scr = driver.page_source
    page_ha_scr = BeautifulSoup(content_ha_scr)
    ha_scr=json.loads(page_ha_scr.text)

    url_page_hca_scr = 'https://hs-consumer-api.espncricinfo.com/v1/pages/match/scorecard?seriesId=366616&matchId={match}'.format(match=mid)
    driver.get(url_page_hca_scr)
    content_hca_scr = driver.page_source
    page_hca_scr = BeautifulSoup(content_hca_scr)
    hca_scr=json.loads(page_hca_scr.text)

    return (ha_scr,hca_scr,driver)
    
def load_bbb_api_response(mid,inns,page):
    driver = webdriver.Chrome("../chromedriver_win32/chromedriver.exe")

    url_page_ha_bbb = 'https://hsapi.espncricinfo.com/v1/pages/match/comments?lang=en&leagueId=8048&eventId={match}&liveTest=false&period={inns}&page={page}'.format(match=mid,inns=inns,page=page)
    driver.get(url_page_ha_bbb)
    content_ha_bbb = driver.page_source
    page_ha_bbb = BeautifulSoup(content_ha_bbb)
    ha_bbb=json.loads(page_ha_bbb.text)

    return (ha_bbb,driver)  
    
def get_match_teams(connection,mid):
    teams_list=[]
    cursor = connection.cursor()
    sql_qry = "select HOME_TEAM_NAME, AWAY_TEAM_NAME from tgt_t20_dbo.matches where match_id = {mid}".format(mid=mid)
    cursor.execute(sql_qry)
    result = cursor.fetchall()
    cursor.close()
    return result
    

### Debug Matches code

In [15]:
connection = orcl_conn()
for mid in [1275271]:
        match_row_dict = {}

        loop_count = 1
        ha_scr,hca_scr,driver = load_api_response(mid)   
        while ('status' in ha_scr) or ('status' in hca_scr):
            driver.quit()
            if loop_count > 3:
                error_log(connection,mid,'not able to access api')
                break

            ha_scr,hca_scr,driver = load_api_response(mid)
            loop_count = loop_count + 1

        print('Extracting Match {} data'.format(mid))
        start_time = time.time()

        """ Matches data """
        match_id = mid
        series_name = hca_scr['match']['series']['longName']
        match_format = ha_scr['header']['matchEvent']['generalClassName']
        season = ha_scr['header']['matchEvent']['season']
        series_match_no = hca_scr['match']['title']
        #match_date = datetime.datetime.strptime(hca_scr['match']['daysInfo'], '%d %B %Y').strftime('%d-%b-%Y')
        match_date = '02-OCT-2019'
        match_time = hca_scr['match']['floodlit']
        venue = hca_scr['match']['ground']['name']
        city = hca_scr['match']['ground']['town']['name']
        country = hca_scr['match']['ground']['country']['name']

        home_team_name = ha_scr['header']['matchEvent']['name'].split(' v ')[0]
        away_team_name = ha_scr['header']['matchEvent']['name'].split(' v ')[1]

        if hca_scr['match']['teams'][0]['team']['longName'] ==  home_team_name: 
            home_team_abb = hca_scr['match']['teams'][0]['team']['abbreviation']
            away_team_abb = hca_scr['match']['teams'][1]['team']['abbreviation']        
            home_team_match_points = hca_scr['match']['teams'][0]['points']
            away_team_match_points = hca_scr['match']['teams'][1]['points']
        elif hca_scr['match']['teams'][0]['team']['longName'] ==  away_team_name: 
            home_team_abb = hca_scr['match']['teams'][1]['team']['abbreviation']
            away_team_abb = hca_scr['match']['teams'][0]['team']['abbreviation']
            home_team_match_points = hca_scr['match']['teams'][1]['points']
            away_team_match_points = hca_scr['match']['teams'][0]['points']

        if ('displayName' in ha_scr['header']['matchEvent']['competitors'][0]['captain']) and ('displayName' in ha_scr['header']['matchEvent']['competitors'][1]['captain']):
            if ha_scr['header']['matchEvent']['competitors'][0]['name'] == home_team_name:
                home_team_captain = ha_scr['header']['matchEvent']['competitors'][0]['captain']['displayName']
                away_team_captain = ha_scr['header']['matchEvent']['competitors'][1]['captain']['displayName']
            elif ha_scr['header']['matchEvent']['competitors'][0]['name'] == away_team_name:
                home_team_captain = ha_scr['header']['matchEvent']['competitors'][1]['captain']['displayName']        
                away_team_captain = ha_scr['header']['matchEvent']['competitors'][0]['captain']['displayName']
        else:
            home_team_captain = None
            away_team_captain = None

        if len(hca_scr['content']['scorecard']['innings']) == 0:
            inns1_team = None
            inns2_team = None
            inns1_runs = None
            inns2_runs = None
            inns1_wkts = None
            inns2_wkts = None
            inns1_overs = None
            inns2_overs = None
            inns1_mins = None
            inns2_mins = None
            inns1_extras = None
            inns2_extras = None
            inns1_byes = None
            inns2_byes = None
            inns1_leg_byes = None
            inns2_leg_byes = None
            inns1_wides = None
            inns2_wides = None
            inns1_no_balls = None
            inns2_no_balls = None
            inns1_penalties = None
            inns2_penalties = None
            inns2_target = None
            inns2_total_overs = None
        elif len(hca_scr['content']['scorecard']['innings']) == 1:
            inns1_team = hca_scr['content']['scorecard']['innings'][0]['team']['longName']
            inns2_team = None
            inns1_runs = hca_scr['content']['scorecard']['innings'][0]['runs']
            inns2_runs = None
            inns1_wkts = hca_scr['content']['scorecard']['innings'][0]['wickets']
            inns2_wkts = None
            inns1_overs = hca_scr['content']['scorecard']['innings'][0]['overs']
            inns2_overs = None
            inns1_mins = hca_scr['content']['scorecard']['innings'][0]['minutes']
            inns2_mins = None
            inns1_extras = hca_scr['content']['scorecard']['innings'][0]['extras']
            inns2_extras = None
            inns1_byes = hca_scr['content']['scorecard']['innings'][0]['byes']
            inns2_byes = None
            inns1_leg_byes = hca_scr['content']['scorecard']['innings'][0]['legbyes']
            inns2_leg_byes = None
            inns1_wides = hca_scr['content']['scorecard']['innings'][0]['wides']
            inns2_wides = None
            inns1_no_balls = hca_scr['content']['scorecard']['innings'][0]['noballs']
            inns2_no_balls = None
            inns1_penalties = hca_scr['content']['scorecard']['innings'][0]['penalties']
            inns2_penalties = None
            inns2_target = None
            inns2_total_overs = None
        else:    
            inns1_team = hca_scr['content']['scorecard']['innings'][0]['team']['longName']
            inns2_team = hca_scr['content']['scorecard']['innings'][1]['team']['longName']
            inns1_runs = hca_scr['content']['scorecard']['innings'][0]['runs']
            inns2_runs = hca_scr['content']['scorecard']['innings'][1]['runs']
            inns1_wkts = hca_scr['content']['scorecard']['innings'][0]['wickets']
            inns2_wkts = hca_scr['content']['scorecard']['innings'][1]['wickets']
            inns1_overs = hca_scr['content']['scorecard']['innings'][0]['overs']
            inns2_overs = hca_scr['content']['scorecard']['innings'][1]['overs']
            inns1_mins = hca_scr['content']['scorecard']['innings'][0]['minutes']
            inns2_mins = hca_scr['content']['scorecard']['innings'][1]['minutes']
            inns1_extras = hca_scr['content']['scorecard']['innings'][0]['extras']
            inns2_extras = hca_scr['content']['scorecard']['innings'][1]['extras']
            inns1_byes = hca_scr['content']['scorecard']['innings'][0]['byes']
            inns2_byes = hca_scr['content']['scorecard']['innings'][1]['byes']
            inns1_leg_byes = hca_scr['content']['scorecard']['innings'][0]['legbyes']
            inns2_leg_byes = hca_scr['content']['scorecard']['innings'][1]['legbyes']
            inns1_wides = hca_scr['content']['scorecard']['innings'][0]['wides']
            inns2_wides = hca_scr['content']['scorecard']['innings'][1]['wides']
            inns1_no_balls = hca_scr['content']['scorecard']['innings'][0]['noballs']
            inns2_no_balls = hca_scr['content']['scorecard']['innings'][1]['noballs']
            inns1_penalties = hca_scr['content']['scorecard']['innings'][0]['penalties']
            inns2_penalties = hca_scr['content']['scorecard']['innings'][1]['penalties']
            inns2_target = hca_scr['content']['scorecard']['innings'][1]['target']
            inns2_total_overs = hca_scr['content']['scorecard']['innings'][1]['totalOvers']

        is_super_over = ha_scr['header']['isSuperOver']
        if str(ha_scr['header']['matchEvent']['competitors'][0]['isWinner'])=='True':  
            winner = ha_scr['header']['matchEvent']['competitors'][0]['name']             
        elif str(ha_scr['header']['matchEvent']['competitors'][1]['isWinner'])=='True':        
            winner = ha_scr['header']['matchEvent']['competitors'][1]['name']          
        else: 
            winner = 'Tie/NR'
        if str(ha_scr['header']['isSuperOver'])=='True': 
            result_type = ha_scr['header']['matchEvent']['statusText'].split(' (')[1].split(')')[0] 
        elif len(ha_scr['header']['matchEvent']['statusText'].split(' won by ')) == 2: 
            result_type = ha_scr['header']['matchEvent']['statusText'].split(' won by ')[1]
        else:
            result_type = ha_scr['header']['matchEvent']['statusText']

        if ('toss' not in ha_scr['content']['about']) or (len(ha_scr['content']['about']['toss'].split(' , ')) == 1):
            toss_winner = None
            toss_decision = None
        else:    
            toss_winner = ha_scr['content']['about']['toss'].split(' , ')[0]
            toss_decision = ha_scr['content']['about']['toss'].split(' , ')[1]

        if 'bestPlayer' in ha_scr['header']:        
            player_of_match = ha_scr['header']['bestPlayer']['name'] 
        else: 
            player_of_match = None
        if 'bestPlayer' in ha_scr['header']:        
            player_of_match_team = ha_scr['header']['bestPlayer']['teamName'] 
        else: 
            player_of_match_team = None

        if len(ha_scr['content']['about']['playerOfSeries']) > 0:
            player_of_series = ha_scr['content']['about']['playerOfSeries'][0]['text']
        else:
            player_of_series = None
        
        if hca_scr['match']['umpires'] is None:
            umpire1_name = None
            umpire1_country = None
            umpire1_gender = None
            umpire2_name = None
            umpire2_country = None
            umpire2_gender = None
        else:
            umpire1_name = hca_scr['match']['umpires'][0]['player']['longName']
            umpire1_country = hca_scr['match']['umpires'][0]['team']['longName']
            umpire1_gender = hca_scr['match']['umpires'][0]['player']['gender']
            umpire2_name = hca_scr['match']['umpires'][1]['player']['longName']
            umpire2_country = hca_scr['match']['umpires'][1]['team']['longName']
            umpire2_gender = hca_scr['match']['umpires'][1]['player']['gender']

        if hca_scr['match']['tvUmpires'] is None:
            tv_umpire_name = None
            tv_umpire_country = None
            tv_umpire_gender = None
        else:    
            tv_umpire_name = hca_scr['match']['tvUmpires'][0]['player']['longName']
            tv_umpire_country = hca_scr['match']['tvUmpires'][0]['team']['longName']
            tv_umpire_gender = hca_scr['match']['tvUmpires'][0]['player']['gender']

        if hca_scr['match']['reserveUmpires'] is None:
            reserve_umpire_name = None
            reserve_umpire_country = None
            reserve_umpire_gender = None
        else:    
            reserve_umpire_name = hca_scr['match']['reserveUmpires'][0]['player']['longName']
            reserve_umpire_country = hca_scr['match']['reserveUmpires'][0]['team']['longName']
            reserve_umpire_gender = hca_scr['match']['reserveUmpires'][0]['player']['gender']

        if hca_scr['match']['matchReferees'] is None:
            match_referee_name = None
            match_referee_country = None
            match_referee_gender = None
        else:    
            match_referee_name = hca_scr['match']['matchReferees'][0]['player']['longName']
            match_referee_country = hca_scr['match']['matchReferees'][0]['team']['longName']
            match_referee_gender = hca_scr['match']['matchReferees'][0]['player']['gender']

        match_col_list = [series_name,match_format,season,series_match_no,match_date,match_time,venue,city,country,home_team_name,
                    home_team_abb,home_team_captain,home_team_match_points,away_team_name,away_team_abb,away_team_captain,
                    away_team_match_points,inns1_team,inns1_runs,inns1_wkts,inns1_overs,inns1_mins,inns1_extras,inns1_byes,
                    inns1_leg_byes,inns1_wides,inns1_no_balls,inns1_penalties,inns2_team,inns2_target,inns2_total_overs,
                    inns2_runs,inns2_wkts,inns2_overs,inns2_mins,inns2_extras,inns2_byes,inns2_leg_byes,inns2_wides,inns2_no_balls,
                    inns2_penalties,is_super_over,winner,result_type,toss_winner,toss_decision,player_of_match,player_of_match_team,
                    umpire1_name,umpire1_country,umpire1_gender,umpire2_name,umpire2_country,umpire2_gender,tv_umpire_name,tv_umpire_country,
                    tv_umpire_gender,reserve_umpire_name,reserve_umpire_country,reserve_umpire_gender,match_referee_name,match_referee_country,
                    match_referee_gender,None,None,player_of_series]
        match_row_dict[mid] = match_col_list    
        matches_df = pd.DataFrame.from_dict(match_row_dict, orient='index')
        matches_df = matches_df.where(pd.notnull(matches_df), None)
        matches_df = matches_df.rename_axis('match_id').reset_index()
        matches_df['match_id'] = matches_df['match_id'].astype(str).astype(int)
        rows = [tuple(x) for x in matches_df.values]
        sql_qry = ("INSERT INTO tgt_t20_dbo.matches VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17,:18,:19,"
                   ":20,:21,:22,:23,:24,:25,:26,:27,:28,:29,:30,:31,:32,:33,:34,:35,:36,:37,:38,:39,:40,:41,:42,:43,:44,:45,:46,"
                   ":47,:48,:49,:50,:51,:52,:53,:54,:55,:56,:57,:58,:59,:60,:61,:62,:63,:64,:65,:66,:67)")
        cursor = connection.cursor()
        cursor.executemany(sql_qry,rows)
        connection.commit()
        cursor.close()

        """ Players data """
        if hca_scr['content']['matchPlayers'] is not None:
            for inns in hca_scr['content']['matchPlayers']['teamPlayers']:
                for players in inns['players']:
                    player_row_dict = {}
                    player_id = players['player']['objectId']

                    #result = check_player_exists(player_id)
                    player_name = players['player']['longName']

                    if players['player']['gender'] is not None:
                        gender = players['player']['gender']
                    else:
                        gender = None

                    if players['player']['playingRole'] is not None:
                        playingRole = players['player']['playingRole']
                    else:
                        playingRole = None

                    if len(players['player']['longBattingStyles'])>0:
                        longBattingStyles = players['player']['longBattingStyles'][0]
                    else:
                        longBattingStyles = None

                    if len(players['player']['longBowlingStyles'])>0:
                        longBowlingStyles = players['player']['longBowlingStyles'][0]
                    else:
                        longBowlingStyles = None

                    if players['player']['dateOfBirth'] is not None:
                        birth_year = players['player']['dateOfBirth']['year']
                        birth_month = players['player']['dateOfBirth']['month']
                        birth_date = players['player']['dateOfBirth']['date']
                    else:
                        birth_year = None
                        birth_month = None
                        birth_date = None

                    if players['player']['dateOfDeath'] is not None:
                        death_year = players['player']['dateOfDeath']['year']
                        death_month = players['player']['dateOfDeath']['month']
                        death_date = players['player']['dateOfDeath']['date']
                    else:
                        death_year = None
                        death_month = None
                        death_date = None

                    player_col_list = [player_name,gender,playingRole,longBattingStyles,longBowlingStyles,birth_year,birth_month,
                                       birth_date,death_year,death_month,death_date,None,None]
                    player_row_dict[player_id] = player_col_list
                    players_df = pd.DataFrame.from_dict(player_row_dict, orient='index')
                    players_df = players_df.where(pd.notnull(players_df), None)
                    players_df = players_df.rename_axis('player_id').reset_index()
                    players_df['player_id'] = players_df['player_id'].astype(str).astype(int)
                    rows = [tuple(x) for x in players_df.values]
                    sql_qry = ("MERGE INTO tgt_t20_dbo.players using DUAL on (player_id = {pid})" 
                               "WHEN NOT MATCHED THEN INSERT VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14)".format(pid=rows[0][0]))
                    cursor = connection.cursor()
                    cursor.executemany(sql_qry,rows)
                    connection.commit()
                    cursor.close()

        """ Batting scorecard """
        if len(hca_scr['content']['scorecard']['innings']) > 0:
            for inns in hca_scr['content']['scorecard']['innings']:
                position_index = 1
                match_id = int(mid)
                innings = inns['inningNumber']

                inns1_team,inns2_team = get_match_teams(connection,mid)[0]
                if innings == 1:
                    team = inns1_team
                else:
                    team = inns2_team

                for batters in inns['inningBatsmen']:
                    batter_row_dict = {}

                    player_id = batters['player']['objectId']
                    player_name = batters['player']['longName']
                    is_batted = batters['battedType']
                    runs = batters['runs']
                    balls = batters['balls']
                    minutes = batters['minutes']
                    fours = batters['fours']
                    sixes = batters['sixes']
                    strikerate = batters['strikerate']
                    isOut = batters['isOut']

                    if str(isOut) == 'True':
                        if batters['dismissalBowler'] is not None:                            
                            dismissal_bowler_id = batters['dismissalBowler']['objectId']
                            dismissal_bowler_name = batters['dismissalBowler']['longName']
                        else:
                            dismissal_bowler_id = None
                            dismissal_bowler_name = None
                        dismissal_type = batters['dismissalText']['short']
                        
                        if batters['dismissalFielders'] is not None:
                            if batters['dismissalFielders'][0]['player'] is not None:
                                dismissal_fielder_id = batters['dismissalFielders'][0]['player']['objectId']
                                dismissal_fielder_name = batters['dismissalFielders'][0]['player']['longName']
                            else:
                                dismissal_fielder_id = None
                                dismissal_fielder_name = None
                            is_keeper_dismissal = batters['dismissalFielders'][0]['isKeeper']
                            is_substitute_dismissal = batters['dismissalFielders'][0]['isSubstitute']
                        else:
                            dismissal_fielder_id = None
                            dismissal_fielder_name = None
                            is_keeper_dismissal = None
                            is_substitute_dismissal = None
                            
                        fow_wicket_num = batters['fowWicketNum']
                        fow_runs = batters['fowRuns']
                        fow_overs = batters['fowOvers']               
                    else:
                        dismissal_bowler_id = None
                        dismissal_bowler_name = None
                        dismissal_type = None
                        dismissal_fielder_id = None
                        dismissal_fielder_name = None
                        is_keeper_dismissal = None
                        is_substitute_dismissal = None
                        fow_wicket_num = None
                        fow_runs = None
                        fow_overs = None

                    batter_col_list = [match_id,innings,team,position_index,player_id,player_name,is_batted,runs,balls,minutes,fours,sixes,
                                       strikerate,isOut,dismissal_bowler_id,dismissal_bowler_name,dismissal_type,dismissal_fielder_id,
                                       dismissal_fielder_name,is_keeper_dismissal,is_substitute_dismissal,fow_wicket_num,fow_runs,
                                       fow_overs,None,None]
                    batter_row_dict[1] = batter_col_list
                    batters_df = pd.DataFrame.from_dict(batter_row_dict, orient='index')
                    batters_df = batters_df.where(pd.notnull(batters_df), None)
                    rows = [tuple(x) for x in batters_df.values]
                    sql_qry = ("INSERT INTO tgt_t20_dbo.batting_scorecard VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,"
                               ":14,:15,:16,:17,:18,:19,:20,:21,:22,:23,:24,:25,:26)")
                    cursor = connection.cursor()
                    cursor.executemany(sql_qry,rows)
                    connection.commit()
                    cursor.close()

                    position_index = position_index + 1

        """ Bowling scorecard """ 
        if len(hca_scr['content']['scorecard']['innings']) > 0:        
            for inns in hca_scr['content']['scorecard']['innings']:
                position_index = 1
                match_id = int(mid)
                innings = inns['inningNumber']

                inns1_team,inns2_team = get_match_teams(connection,mid)[0]
                if innings == 1:
                    team = inns2_team
                else:
                    team = inns1_team

                for bowlers in inns['inningBowlers']:
                    bowler_row_dict = {}

                    player_id = bowlers['player']['objectId']
                    player_name = bowlers['player']['longName']                
                    overs = bowlers['overs']
                    maidens = bowlers['maidens']
                    runs_conceded = bowlers['conceded']
                    wickets = bowlers['wickets']
                    economy = bowlers['economy']
                    dots = bowlers['dots']
                    fours = bowlers['fours']
                    sixes = bowlers['sixes']
                    wides = bowlers['wides']
                    noballs = bowlers['noballs']

                    bowler_col_list = [match_id,innings,team,position_index,player_id,player_name,overs,maidens,runs_conceded,wickets,economy,
                                       dots,fours,sixes,wides,noballs,None,None]
                    bowler_row_dict[1] = bowler_col_list
                    bowlers_df = pd.DataFrame.from_dict(bowler_row_dict, orient='index')
                    bowlers_df = bowlers_df.where(pd.notnull(bowlers_df), None)
                    rows = [tuple(x) for x in bowlers_df.values]
                    sql_qry = ("INSERT INTO tgt_t20_dbo.bowling_scorecard VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,"
                               ":14,:15,:16,:17,:18)")
                    cursor = connection.cursor()
                    cursor.executemany(sql_qry,rows)
                    connection.commit()
                    cursor.close()

                    position_index = position_index + 1

        """ Partnerships """
        if len(hca_scr['content']['scorecard']['innings']) > 0:        
            for inns in hca_scr['content']['scorecard']['innings']:
                position_index = 1
                match_id = int(mid)
                innings = inns['inningNumber']
                team = inns['team']['longName']
                for partnerships in inns['inningPartnerships']:
                    partnership_row_dict = {}

                    runs = partnerships['runs']
                    balls = partnerships['balls']
                    player1_id = partnerships['player1']['id']
                    player1_objid = partnerships['player1']['objectId']
                    player1_name = partnerships['player1']['longName']
                    player2_id = partnerships['player2']['id']
                    player2_objid = partnerships['player2']['objectId']
                    player2_name = partnerships['player2']['longName']
                    outPlayerId = partnerships['outPlayerId']
                    player1Runs = partnerships['player1Runs']
                    player1Balls = partnerships['player1Balls']
                    player2Runs = partnerships['player2Runs']
                    player2Balls = partnerships['player2Balls']

                    partnership_col_list = [match_id,innings,team,position_index,runs,balls,player1_id,player1_objid,
                                            player1_name,player2_id,player2_objid,player2_name,outPlayerId,player1Runs,
                                            player1Balls,player2Runs,player2Balls,None,None]
                    partnership_row_dict[1] = partnership_col_list
                    partnership_df = pd.DataFrame.from_dict(partnership_row_dict, orient='index')
                    partnership_df = partnership_df.where(pd.notnull(partnership_df), None)
                    rows = [tuple(x) for x in partnership_df.values]
                    sql_qry = ("INSERT INTO tgt_t20_dbo.partnerships VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,"
                               ":14,:15,:16,:17,:18,:19)")
                    cursor = connection.cursor()
                    cursor.executemany(sql_qry,rows)
                    connection.commit()
                    cursor.close()

                    position_index = position_index + 1

        """ Debutants """
        if hca_scr['match']['debutPlayers'] is not None:
            for debutants in hca_scr['match']['debutPlayers']:
                debutant_row_dict = {}

                match_id = int(mid)
                team_name = debutants['team']['longName']
                player_id = debutants['player']['objectId']
                player_name = debutants['player']['longName']

                debutant_col_list = [match_id,team_name,player_id,player_name,None,None]
                debutant_row_dict[1] = debutant_col_list
                debutant_df = pd.DataFrame.from_dict(debutant_row_dict, orient='index')
                debutant_df = debutant_df.where(pd.notnull(debutant_df), None)
                rows = [tuple(x) for x in debutant_df.values]
                sql_qry = ("INSERT INTO tgt_t20_dbo.debutants VALUES (:1,:2,:3,:4,:5,:6)")
                cursor = connection.cursor()
                cursor.executemany(sql_qry,rows)
                connection.commit()
                cursor.close()



        driver.quit()
        end_time = time.time()
        print('Extracted Match {} data'.format(mid))
        print('Time taken to load match data (secs):',end_time - start_time)
        print(' ')

print(' ')
print('Matches data Extraction over')
connection.close() 


Extracting Match 1275271 data
Extracted Match 1275271 data
Time taken to load match data (secs): 2.3290889263153076
 
 
Matches data Extraction over


### Debug Ball-by-ball code

In [16]:
connection = orcl_conn()
for mid in [1275271]:
        match_id = int(mid)
        inns1_team,inns2_team = get_match_teams(connection,mid)[0]
        start_time = time.time()

        for inns in np.arange(1,3):
            ha_bbb,driver = load_bbb_api_response(mid,inns,1)
            while ('status' in ha_bbb):
                driver.quit()
                ha_bbb,driver = load_bbb_api_response(mid,inns,1)
            driver.quit()
            total_pages = ha_bbb['pagination']['pageCount']

            for page in np.arange(total_pages):
                ha_bbb,driver = load_bbb_api_response(mid,inns,page+1)
                while ('status' in ha_bbb):
                    driver.quit()
                    ha_bbb,driver = load_bbb_api_response(mid,inns,page+1)

                print('Extracting_{match}_Inns{inns}_Page{page} content'.format(match=mid,inns=inns,page=page+1))

                if len(ha_bbb['comments']) > 0:
                    for ball_data in ha_bbb['comments']:
                        if ball_data['ball'] > 0:
                            bbb_row_dict = {}

                            if inns == 1:
                                batting_team = inns1_team
                                bowling_team = inns2_team
                            else:
                                batting_team = inns2_team
                                bowling_team = inns1_team

                            over = ball_data['over']+1 
                            ball = ball_data['ball']

                            iswide = ball_data['isWide']
                            isNoball = ball_data['isNoball']
                            isRetiredHurt = ball_data['isRetiredHurt']
                            isBoundary = ball_data['isBoundary']

                            total_ball_runs = ball_data['runs']
                            if str(ball_data['isWide'])=='True':
                                wide_runs = ball_data['runs']
                                no_ball_runs = 0
                                bye_legbye_runs = 0
                                batter_runs = 0
                            elif ((str(ball_data['isNoball'])=='True')):
                                wide_runs = 0
                                bye_legbye_runs = 0
                                if (re.search('[(]no ball[)]', ball_data['shortText'])):
                                    no_ball_runs = 1
                                    batter_runs = ball_data['runs'] - 1
                                else:    
                                    no_ball_runs = ball_data['runs']
                                    batter_runs = 0                        
                            elif ('bye' in ball_data['shortText'].split(', ')[1]):
                                bye_legbye_runs = ball_data['runs']
                                wide_runs = 0
                                no_ball_runs = 0                   
                                batter_runs = 0
                            else:
                                wide_runs = 0
                                no_ball_runs = 0
                                batter_runs = ball_data['runs']
                                bye_legbye_runs = 0
                            total_extras_runs = wide_runs + no_ball_runs + bye_legbye_runs

                            if ball_data['currentBatsmen'][0]['id'] == 'undefined':
                                striker_batter_id = None
                            else:
                                striker_batter_id = int(ball_data['currentBatsmen'][0]['id'])
                            striker_batter_name = ball_data['currentBatsmen'][0]['name']
                            striker_batter_runs = ball_data['currentBatsmen'][0]['runs']
                            striker_batter_balls = ball_data['currentBatsmen'][0]['balls']

                            if ball_data['currentBatsmen'][1]['id'] == 'undefined':
                                non_striker_batter_id = None
                            else:
                                non_striker_batter_id = int(ball_data['currentBatsmen'][1]['id'])
                            non_striker_batter_name = ball_data['currentBatsmen'][1]['name']
                            non_striker_batter_runs = ball_data['currentBatsmen'][1]['runs']
                            non_striker_batter_balls = ball_data['currentBatsmen'][1]['balls']

                            if 'id' in ball_data['currentBowlers'][0]:
                                current_bowler_id = int(ball_data['currentBowlers'][0]['id'])
                            else:
                                current_bowler_id = None

                            if 'name' in ball_data['currentBowlers'][0]:
                                current_bowler_name = ball_data['currentBowlers'][0]['name']                    
                            else:
                                current_bowler_name = None

                            current_bowler_overs = ball_data['currentBowlers'][0]['overs']
                            current_bowler_maidens = ball_data['currentBowlers'][0]['maidens']
                            current_bowler_runs = ball_data['currentBowlers'][0]['runs']
                            current_bowler_wickets = ball_data['currentBowlers'][0]['wickets']

                            if 'id' in ball_data['currentBowlers'][1]:
                                partner_bowler_id = int(ball_data['currentBowlers'][1]['id'])
                            else:
                                partner_bowler_id = None

                            if 'name' in ball_data['currentBowlers'][1]:
                                partner_bowler_name = ball_data['currentBowlers'][1]['name']                    
                            else:
                                partner_bowler_name = None
                            partner_bowler_overs = ball_data['currentBowlers'][1]['overs']
                            partner_bowler_maidens = ball_data['currentBowlers'][1]['maidens']
                            partner_bowler_runs = ball_data['currentBowlers'][1]['runs']
                            partner_bowler_wickets = ball_data['currentBowlers'][1]['wickets']                    

                            current_inns_runs = ball_data['currentInning']['runs']
                            current_inns_balls = ball_data['currentInning']['balls']
                            current_inns_wickets = ball_data['currentInning']['wickets']

                            if 'matchOver' in ball_data:
                                over_maiden = ball_data['matchOver']['maiden']
                                over_runs = ball_data['matchOver']['runs']
                                over_wickets = ball_data['matchOver']['wickets']
                                runRate = ball_data['matchOver']['runRate']
                                requiredRunRate = ball_data['matchOver']['requiredRunRate']
                                remainingBalls = ball_data['matchOver']['remainingBalls']
                                remainingRuns = ball_data['matchOver']['remainingRuns']
                            else:
                                over_maiden = None
                                over_runs = None
                                over_wickets = None
                                runRate = None
                                requiredRunRate = None
                                remainingBalls = None
                                remainingRuns = None

                            if ('matchWicket' in ball_data):
                                if 'id' in ball_data['matchWicket']:
                                    is_wicket = 1
                                    dismissed_batter_id = int(ball_data['matchWicket']['id'])
                                    dismissed_batter_name = ball_data['matchWicket']['batsmanName']
                                    if re.search('(run out|obstructing the field)',ball_data['matchWicket']['text']):
                                        is_bowler_wicket = 0
                                    else:
                                        is_bowler_wicket = 1                        
                            else:
                                is_wicket = None
                                dismissed_batter_id = None
                                dismissed_batter_name = None
                                is_bowler_wicket = None

                            bbb_col_list = [match_id,inns,batting_team,bowling_team,over,ball,iswide,isNoball,isRetiredHurt,
                                            isBoundary,total_ball_runs,batter_runs,wide_runs,no_ball_runs,bye_legbye_runs,
                                            total_extras_runs,striker_batter_id,striker_batter_name,striker_batter_runs,
                                            striker_batter_balls,non_striker_batter_id,non_striker_batter_name,non_striker_batter_runs,
                                            non_striker_batter_balls,current_bowler_id,current_bowler_name,current_bowler_overs,
                                            current_bowler_maidens,current_bowler_runs,current_bowler_wickets,partner_bowler_id,
                                            partner_bowler_name,partner_bowler_overs,partner_bowler_maidens,partner_bowler_runs,
                                            partner_bowler_wickets,current_inns_runs,current_inns_balls,current_inns_wickets,
                                            over_maiden,over_runs,over_wickets,runRate,requiredRunRate,remainingBalls,remainingRuns,
                                            is_wicket,dismissed_batter_id,dismissed_batter_name,is_bowler_wicket]

                            bbb_row_dict[1] = bbb_col_list
                            bbb_df = pd.DataFrame.from_dict(bbb_row_dict, orient='index')
                            bbb_df = bbb_df.where(pd.notnull(bbb_df), None)
                            rows = [tuple(x) for x in bbb_df.values]
                            sql_qry = ("INSERT INTO tgt_t20_dbo.bbb_data VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,"
                                       ":14,:15,:16,:17,:18,:19,:20,:21,:22,:23,:24,:25,:26,:27,:28,:29,:30,:31,:32,:33,:34,"
                                       ":35,:36,:37,:38,:39,:40,:41,:42,:43,:44,:45,:46,:47,:48,:49,:50)")
                            cursor = connection.cursor()
                            cursor.executemany(sql_qry,rows)
                            connection.commit()
                            cursor.close()
                driver.quit()

        end_time = time.time()
        print('Extracted BBB data for Match {}'.format(mid))
        print('Time taken to load BBB data (secs):',end_time - start_time)
        print(' ')


print(' ')
print('BBB data Extraction over')
connection.close()        

Extracting_1275271_Inns1_Page1 content
Extracting_1275271_Inns1_Page2 content
Extracting_1275271_Inns1_Page3 content
Extracting_1275271_Inns1_Page4 content
Extracting_1275271_Inns1_Page5 content
Extracting_1275271_Inns1_Page6 content
Extracting_1275271_Inns2_Page1 content
Extracting_1275271_Inns2_Page2 content
Extracting_1275271_Inns2_Page3 content
Extracting_1275271_Inns2_Page4 content
Extracted BBB data for Match 1275271
Time taken to load BBB data (secs): 45.8434534072876
 
 
BBB data Extraction over
