In [None]:
import pandas as pd
bowling = pd.read_csv("BOWLING.csv", encoding="latin1")


In [None]:
bowling.head()

In [None]:
bowling.info()


In [None]:
bowling.columns = bowling.columns.str.lower()
bowling.columns = bowling.columns.str.replace(" ", "_")


In [None]:
bowling = bowling[
    bowling.groupby("bowling")["bowling"].transform("count") >= 5
]


In [None]:
bowling_player = bowling.groupby("bowling").agg(
    matches=("match_id", "count"),
    overs=("overs", "sum"),
    runs=("runs", "sum"),
    wickets=("wickets", "sum"),
    maidens=("maiden", "sum"),
    wides=("wide_balls", "sum"),
    no_balls=("no_balls", "sum")
).reset_index()
bowling_player.head()


In [None]:
bowling_player.sort_values("wickets", ascending=False).head(5)[
    ["bowling", "matches", "wickets"]
]


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Prepare data
top_bowlers = (
    bowling_player
    .sort_values("wickets", ascending=False)
    .head(5)[["bowling", "matches", "wickets"]]
    .reset_index(drop=True)
)

sns.set_theme(style="whitegrid")
plt.figure(figsize=(6.5, 4))   # ðŸ‘ˆ small & neat

ax = sns.barplot(
    x="wickets",
    y="bowling",
    hue="bowling",      # future-proof (no warning)
    data=top_bowlers,
    palette="rocket",
    legend=False
)

# Titles and labels
ax.set_title(
    "Top 5 Wicket Takers â€“ WC23",
    fontsize=11,
    weight="bold"
)
ax.set_xlabel("Wickets Taken", fontsize=9)
ax.set_ylabel("Bowler", fontsize=9)

# Value labels
for container in ax.containers:
    ax.bar_label(container, padding=3, fontsize=8)

sns.despine(left=True, bottom=True)
plt.tight_layout()
plt.show()


In [None]:
bowling_player["economy"] = bowling_player["runs"] / bowling_player["overs"]


In [None]:
bowling_player.sort_values("economy").head(5)[
    ["bowling", "matches", "economy"]
]


In [None]:
dot_balls = bowling.groupby("bowling")["zeroes"].sum().reset_index()
bowling_player = bowling_player.merge(dot_balls, on="bowling")


In [None]:
bowling_player.sort_values("zeroes", ascending=False).head(5)[
    ["bowling", "matches", "zeroes"]
]


In [None]:
dot_balls = bowling.groupby("bowling")["zeroes"].sum().reset_index()
bowling_player = bowling_player.merge(dot_balls, on="bowling", how="left")


In [None]:
bowling_player.columns


In [None]:
bowling_player["zeroes"] = bowling_player["zeroes_y"]
bowling_player = bowling_player.drop(columns=["zeroes_x", "zeroes_y"])


In [None]:
bowling_player.columns


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

features = ["wickets", "economy", "zeroes", "maidens", "no_balls", "wides"]

bowling_player[[f + "_n" for f in features]] = scaler.fit_transform(
    bowling_player[features]
)


In [None]:
bowling_player["personal_rating"] = (
    bowling_player["wickets_n"] * 0.30 +
    (1 - bowling_player["economy_n"]) * 0.20 +
    bowling_player["zeroes_n"] * 0.20 +
    bowling_player["maidens_n"] * 0.15 +
    (1 - bowling_player["no_balls_n"]) * 0.075 +
    (1 - bowling_player["wides_n"]) * 0.075
)


In [None]:
bowling_player.sort_values("personal_rating", ascending=False).head(5)[
    [
        "bowling",
        "matches",
        "wickets",
        "economy",
        "zeroes",
        "maidens",
        "no_balls",
        "wides",
        "personal_rating"
    ]
]


In [None]:
india_bowlers = bowling[bowling['bowling_team'] == 'India']['bowling'].unique()

top_indian_bowlers = (
    bowling_player
    .loc[bowling_player['bowling'].isin(india_bowlers)]
    .sort_values(by='personal_rating', ascending=False)
    .loc[:, [
        'bowling',
        'matches',
        'wickets',
        'economy',
        'zeroes',
        'maidens',
        'no_balls',
        'wides',
        'personal_rating'
    ]]
    .head(5)
)

top_indian_bowlers


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Prepare clean data
india_bowl_df = top_indian_bowlers[["bowling", "personal_rating"]].reset_index(drop=True)
india_bowl_df.columns = ["Bowler", "Personal_Rating"]

sns.set_theme(style="whitegrid")
plt.figure(figsize=(7, 4.5))   # small & neat

# Highlight top 3 bowlers
colors = ["#2ca02c" if i < 3 else "#a1d99b" for i in range(len(india_bowl_df))]

plt.barh(
    india_bowl_df["Bowler"],
    india_bowl_df["Personal_Rating"],
    color=colors
)

# Best performer on top
plt.gca().invert_yaxis()

# Titles & labels
plt.title(
    "Top 5 Indian Bowlers by Personal Rating â€“ WC23",
    fontsize=12,
    weight="bold"
)
plt.xlabel("Personal Rating", fontsize=9)
plt.ylabel("Bowler", fontsize=9)

# Value labels
for i, value in enumerate(india_bowl_df["Personal_Rating"]):
    plt.text(value + 0.5, i, f"{value:.1f}", va="center", fontsize=8)

sns.despine(left=True, bottom=True)
plt.tight_layout()
plt.show()
