In [1]:
import pandas as pd 
import json 
import os 

In [61]:
#function which flattens json data 
def flatten_json(json_data):
        # Extract the basic match info
    match_info = json_data.get('info', {})
    match_date = match_info.get('dates', [None])[0]
    venue = match_info.get('venue')
    teams = match_info.get('teams')

    # Toss details
    toss = match_info.get('toss', {})
    toss_winner = toss.get('winner')
    toss_decision = toss.get('decision')

    # Outcome details
    outcome = match_info.get('outcome', {})
    winner = outcome.get('winner')
    margin = outcome.get('by', None)

    # Get the team which won the toss
    if teams[0] == toss_winner:
        toss_loser = teams[1]
    else:
        toss_loser = teams[0]

    # Get the team which is chasing
    if toss_decision == "field":
        chasing_team = toss_winner
    else:
        chasing_team = toss_loser

    target_runs = None

    # Iterate through all innings to find the target runs
    for inning in json_data.get('innings', []):
        if inning.get('team') == chasing_team:
            target_info = inning.get('target', {})
            target_runs = target_info.get('runs', None)
            break  # Exit the loop once the correct inning is found

    return {
        'match_date': match_date,
        'venue': venue,
        'winner': winner,
        'margin': margin,
        'chasing_team': chasing_team,
        'toss_winner': toss_winner,
        'toss_loser': toss_loser,
        'toss_decision': toss_decision,
        'target_runs': target_runs
}

    
    
    

In [67]:
folder_path = '/Users/dankhan/Documents/cricketPredictor/ipl_json'
flattened_data_list = []

for filename in os.listdir(folder_path):
    if filename.endswith('.json'):  # Only process JSON files
        with open(os.path.join(folder_path, filename), 'r') as file:
            json_data = json.load(file)
            flattened_data = flatten_json(json_data)
            flattened_data_list.append(flattened_data)
            
            
matches = pd.DataFrame(flattened_data_list)
matches.shape
matches

Unnamed: 0,match_date,venue,winner,margin,chasing_team,toss_winner,toss_loser,toss_decision,target_runs
0,2024-04-09,Maharaja Yadavindra Singh International Cricke...,Sunrisers Hyderabad,{'runs': 2},Punjab Kings,Punjab Kings,Sunrisers Hyderabad,field,183.0
1,2023-04-23,"Eden Gardens, Kolkata",Chennai Super Kings,{'runs': 49},Kolkata Knight Riders,Kolkata Knight Riders,Chennai Super Kings,field,236.0
2,2009-05-09,De Beers Diamond Oval,Chennai Super Kings,{'wickets': 7},Chennai Super Kings,Rajasthan Royals,Chennai Super Kings,bat,141.0
3,2021-10-02,Sharjah Cricket Stadium,Delhi Capitals,{'wickets': 4},Delhi Capitals,Delhi Capitals,Mumbai Indians,field,130.0
4,2015-05-19,Wankhede Stadium,Mumbai Indians,{'runs': 25},Chennai Super Kings,Mumbai Indians,Chennai Super Kings,bat,188.0
...,...,...,...,...,...,...,...,...,...
1090,2010-03-22,Brabourne Stadium,Mumbai Indians,{'wickets': 7},Mumbai Indians,Kolkata Knight Riders,Mumbai Indians,bat,156.0
1091,2020-10-10,Dubai International Cricket Stadium,Royal Challengers Bangalore,{'runs': 37},Chennai Super Kings,Royal Challengers Bangalore,Chennai Super Kings,bat,170.0
1092,2019-04-07,Sawai Mansingh Stadium,Kolkata Knight Riders,{'wickets': 8},Kolkata Knight Riders,Kolkata Knight Riders,Rajasthan Royals,field,140.0
1093,2013-04-13,"MA Chidambaram Stadium, Chepauk",Chennai Super Kings,{'wickets': 4},Chennai Super Kings,Chennai Super Kings,Royal Challengers Bangalore,field,166.0


In [68]:
matches['toss_winner'].value_counts()

toss_winner
Mumbai Indians                 143
Chennai Super Kings            122
Kolkata Knight Riders          122
Rajasthan Royals               120
Royal Challengers Bangalore    113
Sunrisers Hyderabad             88
Kings XI Punjab                 85
Delhi Daredevils                80
Delhi Capitals                  50
Deccan Chargers                 43
Punjab Kings                    24
Gujarat Titans                  22
Pune Warriors                   20
Lucknow Super Giants            19
Gujarat Lions                   15
Kochi Tuskers Kerala             8
Royal Challengers Bengaluru      8
Rising Pune Supergiants          7
Rising Pune Supergiant           6
Name: count, dtype: int64

In [69]:
matches["toss_loser"].value_counts()

toss_loser
Kolkata Knight Riders          129
Royal Challengers Bangalore    127
Mumbai Indians                 118
Chennai Super Kings            116
Kings XI Punjab                105
Rajasthan Royals               101
Sunrisers Hyderabad             94
Delhi Daredevils                81
Delhi Capitals                  41
Punjab Kings                    32
Deccan Chargers                 32
Pune Warriors                   26
Lucknow Super Giants            25
Gujarat Titans                  23
Gujarat Lions                   15
Rising Pune Supergiant          10
Rising Pune Supergiants          7
Royal Challengers Bengaluru      7
Kochi Tuskers Kerala             6
Name: count, dtype: int64

In [65]:
#pune warriors, rising pune supergiant/s
#deccan chargers
#gujurat lions 
#rcb is times 2 
#dehli capitals and daredevils = same team
#Kochi Tuskers Kerala 



# Chennai Super Kings

# Delhi Capitals

# Gujarat Titans

# Kolkata Knight Riders

# Lucknow Super Giants

# Mumbai Indians

# Punjab Kings

# Rajasthan Royals

# Royal Challengers Bangalore

# Sunrisers Hyderabad


In [66]:
df.dtypes

match_date        object
venue             object
winner            object
margin            object
chasing_team      object
toss_winner       object
toss_loser        object
toss_decision     object
target_runs      float64
dtype: object

# Cleaning Data for ML 

In [70]:
matches["match_date"] = pd.to_datetime(matches['match_date']) 
matches

Unnamed: 0,match_date,venue,winner,margin,chasing_team,toss_winner,toss_loser,toss_decision,target_runs
0,2024-04-09,Maharaja Yadavindra Singh International Cricke...,Sunrisers Hyderabad,{'runs': 2},Punjab Kings,Punjab Kings,Sunrisers Hyderabad,field,183.0
1,2023-04-23,"Eden Gardens, Kolkata",Chennai Super Kings,{'runs': 49},Kolkata Knight Riders,Kolkata Knight Riders,Chennai Super Kings,field,236.0
2,2009-05-09,De Beers Diamond Oval,Chennai Super Kings,{'wickets': 7},Chennai Super Kings,Rajasthan Royals,Chennai Super Kings,bat,141.0
3,2021-10-02,Sharjah Cricket Stadium,Delhi Capitals,{'wickets': 4},Delhi Capitals,Delhi Capitals,Mumbai Indians,field,130.0
4,2015-05-19,Wankhede Stadium,Mumbai Indians,{'runs': 25},Chennai Super Kings,Mumbai Indians,Chennai Super Kings,bat,188.0
...,...,...,...,...,...,...,...,...,...
1090,2010-03-22,Brabourne Stadium,Mumbai Indians,{'wickets': 7},Mumbai Indians,Kolkata Knight Riders,Mumbai Indians,bat,156.0
1091,2020-10-10,Dubai International Cricket Stadium,Royal Challengers Bangalore,{'runs': 37},Chennai Super Kings,Royal Challengers Bangalore,Chennai Super Kings,bat,170.0
1092,2019-04-07,Sawai Mansingh Stadium,Kolkata Knight Riders,{'wickets': 8},Kolkata Knight Riders,Kolkata Knight Riders,Rajasthan Royals,field,140.0
1093,2013-04-13,"MA Chidambaram Stadium, Chepauk",Chennai Super Kings,{'wickets': 4},Chennai Super Kings,Chennai Super Kings,Royal Challengers Bangalore,field,166.0


In [71]:
matches.dtypes

match_date       datetime64[ns]
venue                    object
winner                   object
margin                   object
chasing_team             object
toss_winner              object
toss_loser               object
toss_decision            object
target_runs             float64
dtype: object

In [None]:
# base winning a match on the toss decision - whether they chose to bowl or bat 