In [163]:
import numpy as np
import os
import pandas as pd
from tqdm import tqdm
import copy
import pickle


In [153]:
# Config variables
clean_data_path = os.path.join("..", "clean_data") 

In [154]:
df_player = pd.read_csv(os.path.join(clean_data_path, "player.csv"))
df_player = df_player.loc[:, ~df_player.columns.str.contains('^Unnamed')]

In [155]:
df_ball = pd.read_csv(os.path.join(clean_data_path, "ball.csv"))
df_ball = df_ball.loc[:, ~df_ball.columns.str.contains('^Unnamed')]

In [156]:
empty_match_obj = {
    "runs_scored" : {},
    "balls_faced" : 0,
    "num_4s" : 0,
    "num_6s" : 0,
    "wickets_taken" : {
        "Left-hand bat" : 0,
        "Right-hand bat" : 0 
    },
    "balls_bowled" : 0,
    "catches" : 0,
    "direct_runouts" : 0,
    "indirect_runouts" : 0,
    "stumping" : 0,
    "fantasy_points" :0
}

In [157]:
bowler_dismissal_type = ['bowled',
 'caught',
 'caught and bowled',
 'hit wicket',
 'lbw',
 'obstructing the field',
 'stumped']

In [158]:
bowling_styles = set(df_player["bowling_style"])
empty_runs_scored_obj= {}
for bowling_style in bowling_styles:
    if type(bowling_style) != str:
        continue
    empty_runs_scored_obj[bowling_style] = 0
empty_match_obj["runs_scored"] = empty_runs_scored_obj

In [159]:
fantasy_obj = {}

In [160]:
for i,row in tqdm(df_ball.iterrows()):
    try:
        fantasy_obj[row["batsman"]]
    except Exception as e:
        fantasy_obj[row["batsman"]] = {}
    
    try: 
        fantasy_obj[row["batsman"]][row["match_id"]]
    except Exception as e:
        fantasy_obj[row["batsman"]][row["match_id"]] = copy.deepcopy(empty_match_obj)
        
    try:
        fantasy_obj[row["bowler"]]
    except Exception as e:
        fantasy_obj[row["bowler"]] = {}
    
    try: 
        fantasy_obj[row["bowler"]][row["match_id"]]
    except Exception as e:
        fantasy_obj[row["bowler"]][row["match_id"]] = copy.deepcopy(empty_match_obj)

    batsman = df_player.loc[df_player['player_id'] == row["batsman"]]
    bowler = df_player.loc[df_player['player_id'] == row["bowler"]]
    fielders = []
    if type(row["fielders"]) == str:
        fielders = row["fielders"].split(",")
    
    #batsman
    fantasy_obj[row["batsman"]][row["match_id"]]["runs_scored"][bowler["bowling_style"].values[0]] += row["batsman_runs"]
    if row["wide_runs"] == 0:
        fantasy_obj[row["batsman"]][row["match_id"]]["balls_faced"] += 1
    
    #bowler
    if row["dismissal_type"] in bowler_dismissal_type:
        fantasy_obj[row["bowler"]][row["match_id"]]["wickets_taken"][batsman["batting_style"].values[0]] += 1
    if row["wide_runs"] == 0 & row["noball_runs"] == 0:
        fantasy_obj[row["bowler"]][row["match_id"]]["balls_bowled"] += 1
        
    #fielder
    for fielder in fielders:
        try:
            fantasy_obj[fielder]
        except Exception as e:
            fantasy_obj[fielder] = {}
        try: 
            fantasy_obj[fielder][row["match_id"]]
        except Exception as e:
            fantasy_obj[fielder][row["match_id"]] = copy.deepcopy(empty_match_obj)
        
        if row["dismissal_type"] == "run out":
            if len(fielders) == 1:
                fantasy_obj[fielder][row["match_id"]]["direct_runouts"] += 1
            else:
                fantasy_obj[fielder][row["match_id"]]["indirect_runouts"] += 1
        elif row["dismissal_type"] == "stumped":
            fantasy_obj[fielder][row["match_id"]]["stumping"] += 1
        else:
            fantasy_obj[fielder][row["match_id"]]["catches"] += 1

193468it [02:54, 1105.59it/s]


In [168]:
fantasy_pi = open(os.path.join(clean_data_path, 'fantasy.pkl'), 'wb') 
pickle.dump(fantasy_obj, fantasy_pi, pickle.HIGHEST_PROTOCOL)