In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
import json

In [2]:
with open("t20_wc_match_results.json") as f:
   data = json.load(f)
df_results = pd.DataFrame(data[0]["matchSummary"])

In [3]:
with open("t20_wc_player_info.json") as f:
   data = json.load(f)
df_player = pd.DataFrame(data)

In [4]:
with open("t20_wc_batting_summary.json") as f:
   data = json.load(f)
all_matches = []
for match in data:
    all_matches.extend(match["battingSummary"])
df_batting = pd.DataFrame(all_matches)

In [7]:
with open("t20_wc_bowling_summary.json") as f:
   data = json.load(f)
all_matches = []
for match in data:
    all_matches.extend(match["bowlingSummary"])
df_bowling = pd.DataFrame(all_matches)

In [8]:
df_results.to_csv("results.csv", index=False)
df_player.to_csv("player.csv", index=False)
df_batting.to_csv("battling.csv", index=False)
df_bowling.to_csv("bowling.csv", index=False)

### **Result DataFrame**

In [124]:
df_results.head()

Unnamed: 0,team1,team2,winner,margin,ground,matchDate,scorecard
0,Namibia,Sri Lanka,Namibia,55 runs,Geelong,"Oct 16, 2022",T20I # 1823
1,Netherlands,U.A.E.,Netherlands,3 wickets,Geelong,"Oct 16, 2022",T20I # 1825
2,Scotland,West Indies,Scotland,42 runs,Hobart,"Oct 17, 2022",T20I # 1826
3,Ireland,Zimbabwe,Zimbabwe,31 runs,Hobart,"Oct 17, 2022",T20I # 1828
4,Namibia,Netherlands,Netherlands,5 wickets,Geelong,"Oct 18, 2022",T20I # 1830


In [125]:
df_results.rename({"scorecard" : "match_id"}, axis=1, inplace=True)

In [126]:
df_results["matchDate"] = df_results["matchDate"].apply(lambda x: x.replace("Oct", "10,") if x.find("Oct") != -1 else x.replace("Nov", "11,"))

In [129]:
df_results["stage"] = df_results["matchDate"].apply(lambda x: "Qualifier" if x < "10, 22, 2022"  else "Super 12")

In [131]:
df_results.head()

Unnamed: 0,team1,team2,winner,margin,ground,matchDate,match_id,stage
0,Namibia,Sri Lanka,Namibia,55 runs,Geelong,"10, 16, 2022",T20I # 1823,Qualifier
1,Netherlands,U.A.E.,Netherlands,3 wickets,Geelong,"10, 16, 2022",T20I # 1825,Qualifier
2,Scotland,West Indies,Scotland,42 runs,Hobart,"10, 17, 2022",T20I # 1826,Qualifier
3,Ireland,Zimbabwe,Zimbabwe,31 runs,Hobart,"10, 17, 2022",T20I # 1828,Qualifier
4,Namibia,Netherlands,Netherlands,5 wickets,Geelong,"10, 18, 2022",T20I # 1830,Qualifier


In [9]:
match_ids_dict = {}
for index,row in df_results.iterrows():
    key1 = row["team1"] + " Vs " + row["team2"]
    key2 = row["team2"] + " Vs " + row["team1"]
    
    match_ids_dict[key1] = row["match_id"]
    match_ids_dict[key2] = row["match_id"]

### **Batting DataFrame**

In [10]:
df_batting.head()

Unnamed: 0,match,teamInnings,battingPos,batsmanName,dismissal,runs,balls,4s,6s,SR
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,c Pramod Madushan b Chameera,3,6,0,0,50.0
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,c Shanaka b Pramod Madushan,9,9,1,0,100.0
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie-Eaton,c †Mendis b Karunaratne,20,12,1,2,166.66
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,c DM de Silva b Pramod Madushan,26,24,2,0,108.33
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus(c),c Gunathilaka b PWH de Silva,20,24,0,0,83.33


In [11]:
df_batting["out/not_out"] = df_batting["dismissal"].apply(lambda x: "out" if len(x) > 0 else "not_out")
df_batting.drop("dismissal", axis = 1, inplace=True)
df_batting["batsmanName"] = df_batting["batsmanName"].str.replace(r"[^a-zA-Z ()]+", " ").str.strip()

  df_batting["batsmanName"] = df_batting["batsmanName"].str.replace(r"[^a-zA-Z ()]+", " ").str.strip()


In [12]:
df_batting.head(5)

Unnamed: 0,match,teamInnings,battingPos,batsmanName,runs,balls,4s,6s,SR,out/not_out
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,3,6,0,0,50.0,out
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,9,9,1,0,100.0,out
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie Eaton,20,12,1,2,166.66,out
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,26,24,2,0,108.33,out
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus(c),20,24,0,0,83.33,out


In [13]:
df_batting["match_id"] = df_batting["match"].map(match_ids_dict)

Unnamed: 0,match,teamInnings,battingPos,batsmanName,runs,balls,4s,6s,SR,out/not_out,match_id
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,3,6,0,0,50.0,out,T20I # 1823
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,9,9,1,0,100.0,out,T20I # 1823
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie Eaton,20,12,1,2,166.66,out,T20I # 1823
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,26,24,2,0,108.33,out,T20I # 1823
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus(c),20,24,0,0,83.33,out,T20I # 1823


In [22]:
df_batting["batsmanName"] = df_batting["batsmanName"].apply(lambda x: x[:x.find("(c)")] if x.find("(c)") != - 1 else x)

In [24]:
df_batting["out/not_out"] = df_batting["out/not_out"].apply(lambda x: 1 if x == "out" else 0)

In [29]:
df_batting.rename({"teamInnings" : "team", "4s" : "fours", "6s" : "sixes", "out/not_out" : "out"}, axis=1, inplace=True)

In [30]:
df_batting.head()

Unnamed: 0,match,team,battingPos,batsmanName,runs,balls,fours,sixes,SR,out,match_id
0,Namibia Vs Sri Lanka,Namibia,1,Michael van Lingen,3,6,0,0,50.0,1,T20I # 1823
1,Namibia Vs Sri Lanka,Namibia,2,Divan la Cock,9,9,1,0,100.0,1,T20I # 1823
2,Namibia Vs Sri Lanka,Namibia,3,Jan Nicol Loftie Eaton,20,12,1,2,166.66,1,T20I # 1823
3,Namibia Vs Sri Lanka,Namibia,4,Stephan Baard,26,24,2,0,108.33,1,T20I # 1823
4,Namibia Vs Sri Lanka,Namibia,5,Gerhard Erasmus,20,24,0,0,83.33,1,T20I # 1823


### **Bowling DataFrame**

In [81]:
df_bowling.head()

Unnamed: 0,match,bowlingTeam,bowlerName,overs,maiden,runs,wickets,economy,0s,4s,6s,wides,noBalls
0,Namibia Vs Sri Lanka,Sri Lanka,Maheesh Theekshana,4,0,23,1,5.75,7,0,0,2,0
1,Namibia Vs Sri Lanka,Sri Lanka,Dushmantha Chameera,4,0,39,1,9.75,6,3,1,2,0
2,Namibia Vs Sri Lanka,Sri Lanka,Pramod Madushan,4,0,37,2,9.25,6,3,1,0,0
3,Namibia Vs Sri Lanka,Sri Lanka,Chamika Karunaratne,4,0,36,1,9.0,7,3,1,1,0
4,Namibia Vs Sri Lanka,Sri Lanka,Wanindu Hasaranga de Silva,4,0,27,1,6.75,8,1,1,0,0


In [82]:
df_bowling.rename({"bowlingTeam" : "team", "0s" : "zeros", "4s" : "fours", "6s" : "sixes", "out/not_out" : "out"}, axis=1, inplace=True)

In [83]:
df_bowling[["overs1", "overs2"]] = df_bowling['overs'].str.split('.', expand=True)
df_bowling.drop("overs", axis = 1, inplace=True)
df_bowling["overs2"].fillna(0, inplace=True)
df_bowling = df_bowling.iloc[:, [0,1,2,12,13,3,4,5,6,7,8,9,10,11]]

In [94]:
df_bowling["overs1"] = df_bowling["overs1"].astype("int")
df_bowling["overs2"] = df_bowling["overs2"].astype("int")

In [99]:
df_bowling["ball"] = df_bowling["overs1"]*6 + df_bowling["overs2"]

In [101]:
df_bowling.head()

Unnamed: 0,match,team,bowlerName,overs1,overs2,maiden,runs,wickets,economy,zeros,fours,sixes,wides,noBalls,ball
0,Namibia Vs Sri Lanka,Sri Lanka,Maheesh Theekshana,4,0,0,23,1,5.75,7,0,0,2,0,24
1,Namibia Vs Sri Lanka,Sri Lanka,Dushmantha Chameera,4,0,0,39,1,9.75,6,3,1,2,0,24
2,Namibia Vs Sri Lanka,Sri Lanka,Pramod Madushan,4,0,0,37,2,9.25,6,3,1,0,0,24
3,Namibia Vs Sri Lanka,Sri Lanka,Chamika Karunaratne,4,0,0,36,1,9.0,7,3,1,1,0,24
4,Namibia Vs Sri Lanka,Sri Lanka,Wanindu Hasaranga de Silva,4,0,0,27,1,6.75,8,1,1,0,0,24


### **Player DataFrame**

In [14]:
df_player.head()

Unnamed: 0,name,team,battingStyle,bowlingStyle,playingRole,description
0,Michael van Lingen,Namibia,Left hand Bat,Left arm Medium,Bowling Allrounder,
1,Divan la Cock,Namibia,Right hand Bat,Legbreak,Opening Batter,
2,Jan Nicol Loftie-Eaton,Namibia,Left hand Bat,"Right arm Medium, Legbreak",Batter,
3,Stephan Baard,Namibia,Right hand Bat,Right arm Medium fast,Batter,
4,Gerhard Erasmus(c),Namibia,Right hand Bat,Right arm Offbreak,Allrounder,


In [20]:
df_player["name"] = df_player["name"].apply(lambda x: x[:x.find("(c)")] if x.find("(c)") != - 1 else x)

In [21]:
df_player.head()

Unnamed: 0,name,team,battingStyle,bowlingStyle,playingRole,description
0,Michael van Lingen,Namibia,Left hand Bat,Left arm Medium,Bowling Allrounder,
1,Divan la Cock,Namibia,Right hand Bat,Legbreak,Opening Batter,
2,Jan Nicol Loftie-Eaton,Namibia,Left hand Bat,"Right arm Medium, Legbreak",Batter,
3,Stephan Baard,Namibia,Right hand Bat,Right arm Medium fast,Batter,
4,Gerhard Erasmus,Namibia,Right hand Bat,Right arm Offbreak,Allrounder,
