In [5]:
import os
import import_ipynb
import Connections as conn
import cx_Oracle
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
import time
import datetime
import yaml

### Define functions

In [6]:
# Establish connection with Oracle Database
def orcl_conn(): 
    connstr = conn.conn_string()
    connection = cx_Oracle.connect(connstr)
    return connection

# Fetch matches list based on files present in directory
def match_list_file():
    match_list=[]
    input_path=r'C:/Users/ninju/OneDrive/Desktop/Cricket_Analysis/Cricsheet data/Downloads/Test'
    match_list = [f for f in os.listdir(input_path)]
    return (input_path,match_list)

# Log match ids for which execution didn't complete due to errors
def error_log(mid,err):
    print('Error while loading match {match} data'.format(match=mid))
    print('Error logging begins for match {match}'.format(match=mid))
    connection = orcl_conn()
    cursor = connection.cursor()
    sql_qry = "insert into temp_tgt_dbo.error_log (match_id,error_msg) values (:1,:2)"
    cursor.execute(sql_qry,(mid,str(err)))
    connection.commit()
    cursor.close()
    connection.close()
    print('Error logging successful')
    print(' ')

### Read YAML files and parse it into Pandas Dataframe

In [14]:
input_path,match_list=match_list_file()
match_list
matches_row_dict = {}
bbb_row_dict = {}
for match in match_list:
    try:
        yaml_file = open(r'{path}/{match}'.format(path=input_path,match=match))
        yaml_dict = yaml.load(yaml_file, Loader=yaml.FullLoader)
        print('Parsing file {match}'.format(match))
        
        # Parsing Matches data
        match_id=match.split('.')[0]
        tournament=yaml_dict['info']['competition']
        gender=yaml_dict['info']['gender']
        match_type=yaml_dict['info']['match_type']
        overs=yaml_dict['info']['overs']
        match_date=datetime.datetime.strptime(yaml_dict['info']['dates'][0], '%Y-%m-%d').strftime('%d-%b-%Y')
        team1=yaml_dict['info']['teams'][0]
        team2=yaml_dict['info']['teams'][1]
        venue=yaml_dict['info']['venue']
        city=yaml_dict['info']['city']
        
        if 'winner' in yaml_dict['info']['outcome']:
            winner=yaml_dict['info']['outcome']['winner']
            margin_type=[k for k, v in yaml_dict['info']['outcome']['by'].items()]
            margin_number=[v for k, v in yaml_dict['info']['outcome']['by'].items()]
        else:
            winner=yaml_dict['info']['outcome']['result']
            if 'eliminator' in yaml_dict['info']['outcome']:
                margin_type='eliminator'
                margin_number=yaml_dict['info']['outcome']['eliminator']
            else:
                margin_type=None
                margin_number=None
        
        if 'player_of_match' in yaml_dict['info']:
            player_of_match=yaml_dict['info']['player_of_match'][0]
        else:
            player_of_match=None
            
        toss_winner=yaml_dict['info']['toss']['winner']
        toss_decision=yaml_dict['info']['toss']['decision']
        umpire1=yaml_dict['info']['umpires'][0]
        umpire2=yaml_dict['info']['umpires'][1]
        
        matches_col_list=[tournament,gender,match_type,overs,match_date,team1,team2,venue,city,winner,margin_type,margin_number,
                          player_of_match,toss_winner,toss_decision,umpire1,umpire2]
        matches_row_dict[mid] = matches_col_list
        
        
        #Parsing BBB data
        
    except Exception as err:
        error_log(mid,err)