In [None]:
import pandas as pd

df = pd.read_csv("Cricket_WC23.csv")

df.head()


In [None]:
df["Win_Type"] = df["Margin"].str.lower().str.extract(r"(runs|wickets)")


In [None]:
df["Team1_Batting"] = None
df["Team2_Batting"] = None


In [None]:
# Case 1: Winner won by wickets (winner batted second)
mask_wickets = df["Win_Type"] == "wickets"

df.loc[mask_wickets & (df["Winner"] == df["Team 1"]), "Team1_Batting"] = "Batted Second"
df.loc[mask_wickets & (df["Winner"] == df["Team 1"]), "Team2_Batting"] = "Batted First"

df.loc[mask_wickets & (df["Winner"] == df["Team 2"]), "Team2_Batting"] = "Batted Second"
df.loc[mask_wickets & (df["Winner"] == df["Team 2"]), "Team1_Batting"] = "Batted First"


# Case 2: Winner won by runs (winner batted first)
mask_runs = df["Win_Type"] == "runs"

df.loc[mask_runs & (df["Winner"] == df["Team 1"]), "Team1_Batting"] = "Batted First"
df.loc[mask_runs & (df["Winner"] == df["Team 1"]), "Team2_Batting"] = "Batted Second"

df.loc[mask_runs & (df["Winner"] == df["Team 2"]), "Team2_Batting"] = "Batted First"
df.loc[mask_runs & (df["Winner"] == df["Team 2"]), "Team1_Batting"] = "Batted Second"


In [None]:
df[["Team 1", "Team1_Batting", "Team 2", "Team2_Batting", "Winner", "Margin"]].head()


In [None]:
team_level_df = pd.concat(
    [
        df[["Team 1", "Team1_Batting"]]
        .rename(columns={"Team 1": "Team", "Team1_Batting": "Batting_Order"}),

        df[["Team 2", "Team2_Batting"]]
        .rename(columns={"Team 2": "Team", "Team2_Batting": "Batting_Order"})
    ],
    ignore_index=True
)


In [None]:
team_level_df.head(10)


In [None]:
team_batting_counts = (
    team_level_df
    .groupby(["Team", "Batting_Order"])
    .size()
    .unstack(fill_value=0)
    .reset_index()
)


In [None]:
team_batting_counts


In [None]:
team_batting_counts["Total"] = (
    team_batting_counts.get("Batted First", 0) +
    team_batting_counts.get("Batted Second", 0)
)

team_batting_counts.sort_values("Total", ascending=False).head(10)


In [None]:
df["Win_Type"] = df["Margin"].str.lower().str.extract(r"(runs|wickets)")


In [None]:
df["Winner_Batting"] = df["Win_Type"].map({
    "runs": "Batted First",
    "wickets": "Batted Second"
})


In [None]:
batting_order_wins = df["Winner_Batting"].value_counts().reset_index()
batting_order_wins.columns = ["Batting_Order", "Wins"]

batting_order_wins


In [None]:
import matplotlib.pyplot as plt

labels = batting_order_wins["Batting_Order"]
sizes = batting_order_wins["Wins"]

plt.figure(figsize=(5, 5))

wedges, texts, autotexts = plt.pie(
    sizes,
    labels=labels,
    autopct="%1.1f%%",
    startangle=90,
    wedgeprops=dict(width=0.4)
)

plt.text(
    0, 0,
    "WC23\nWins",
    ha="center",
    va="center",
    fontsize=11,
    weight="bold"
)

plt.title(
    "Match Wins by Batting Order – WC23",
    fontsize=12,
    weight="bold"
)

plt.tight_layout()
plt.show()


In [None]:
total_matches = batting_order_wins["Wins"].sum()

batting_order_wins["Win %"] = (
    batting_order_wins["Wins"] / total_matches * 100
).round(2)

batting_order_wins


In [None]:
df["Ground"].value_counts().reset_index().rename(
    columns={"index": "Ground", "Ground": "Matches"}
)


In [None]:
ground_batting_wins = (
    df.groupby(["Ground", "Winner_Batting"])
    .size()
    .unstack(fill_value=0)
    .reset_index()
)

ground_batting_wins


In [None]:
import matplotlib.pyplot as plt

# Identify batting-order columns dynamically
order_cols = [col for col in ground_batting_wins.columns if col != "Ground"]

bat_first = ground_batting_wins[order_cols[0]]
bat_second = ground_batting_wins[order_cols[1]]

grounds = ground_batting_wins["Ground"]

plt.figure(figsize=(8, 4))

plt.bar(grounds, bat_first, label=order_cols[0], color="#4c72b0")
plt.bar(grounds, bat_second, bottom=bat_first, label=order_cols[1], color="#dd8452")

plt.title(
    "Ground-wise Match Wins by Batting Order – WC23",
    fontsize=11,
    weight="bold"
)
plt.xlabel("Ground", fontsize=9)
plt.ylabel("Number of Wins", fontsize=9)

plt.xticks(rotation=45, ha="right", fontsize=8)
plt.legend(fontsize=8)

plt.tight_layout()
plt.show()


In [None]:
ground_batting_wins["Chasing_Advantage"] = (
    ground_batting_wins.get("Batted Second", 0) -
    ground_batting_wins.get("Batted First", 0)
)

ground_batting_wins.sort_values("Chasing_Advantage", ascending=False)


In [None]:
df.groupby(["Ground", "Winner"]).size().reset_index(name="Wins")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

ground_winner = (
    df.groupby(["Ground", "Winner"])
    .size()
    .reset_index(name="Wins")
)

# pick top winner per ground
top_ground_winner = (
    ground_winner
    .sort_values(["Ground", "Wins"], ascending=[True, False])
    .groupby("Ground")
    .head(1)
)


In [None]:
sns.set_theme(style="whitegrid")
plt.figure(figsize=(8, 4))

ax = sns.barplot(
    data=top_ground_winner,
    x="Wins",
    y="Ground",
    hue="Winner",
    dodge=False,
    palette="tab10"
)

ax.set_title(
    "Most Successful Team at Each Ground – WC23",
    fontsize=11,
    weight="bold"
)
ax.set_xlabel("Number of Wins", fontsize=9)
ax.set_ylabel("Ground", fontsize=9)

# value labels
for container in ax.containers:
    ax.bar_label(container, padding=3, fontsize=8)

sns.despine(left=True, bottom=True)
plt.tight_layout()
plt.show()
