In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
df = pd.read_csv("ipl.csv")
df.head()

In [None]:
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df.head()

DATA CLEANING

In [None]:
df.duplicated().sum()

In [None]:
df.isnull().sum()

In [None]:
df["winner"].fillna("No Result", inplace=True)
df["player_of_match"].fillna("Unknown", inplace=True)

In [None]:
df.info()

In [None]:
df.drop(columns=["umpire3"], inplace=True)

In [None]:
df.info()

In [None]:
def get_margin_type(row):
    if row["win_by_runs"] > 0:
        return "runs"
    else:
        return "wickets"

df["margin_type"] = df.apply(get_margin_type, axis=1)
df.head()
print(df[["win_by_runs", "win_by_wickets", "margin_type"]].head(10))


In [None]:
toss_match_list = []

for i in range(len(df)):
    if df.loc[i, "toss_winner"] == df.loc[i, "winner"]:
        toss_match_list.append(True)
    else:
        toss_match_list.append(False)

df["toss_win_match_win"] = toss_match_list

print(df[["toss_winner", "winner", "toss_win_match_win"]].head(10))

In [None]:
match_type_list = []

for i in range(len(df)):
    if df.loc[i, "win_by_runs"] > 0:
        match_type_list.append("defend")
    else:
        match_type_list.append("chase")

df["match_type"] = match_type_list

print(df[["win_by_runs", "win_by_wickets", "match_type"]].head(10))

DATA ANALYSIS


1. Matches per Season

In [None]:
plt.figure(figsize=(8,4))
sns.countplot(x="season",data=df)
plt.title("Matches per Season")
plt.show()

2.Most Successful Team

In [None]:
plt.figure(figsize=(10,4))
sns.countplot(y="winner", data=df, order=df["winner"].value_counts().index)
plt.title("Most Successful Teams")
plt.show()

3.Toss Decision Distribution

In [None]:
plt.figure(figsize=(5,4))
sns.countplot(x="toss_decision", data=df)
plt.title("Toss Decisions")
plt.show()

4.Win Margin Distribution

In [None]:
plt.figure(figsize=(10,4))
sns.histplot(df["win_by_runs"], bins=30, color="teal", kde=True)
plt.title("Distribution of Wins by Runs")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(df["win_by_wickets"], bins=10, color="salmon", kde=True)
plt.title("Distribution of Wins by Wickets")
plt.show()

5.Team with Most Wins While Chasing

In [None]:
plt.figure(figsize=(8,4))
sns.countplot(y="winner", data=df[df["match_type"]=="chase"], order=df[df["match_type"]=="chase"]["winner"].value_counts().index)
plt.title("Teams Winning by Chasing")
plt.show()

Observation: RCB has highest wins at chasing.


6.Top 5 Player of the Match Winners

In [None]:
top_players = df["player_of_match"].value_counts().head(5).index
plt.figure(figsize=(8,4))
sns.countplot(y="player_of_match", data=df[df["player_of_match"].isin(top_players)], order=top_players, color="orange")
plt.title("Top 5 Player of the Match Winners")
plt.show()

Observation: CH GAYLE and YK PATHAN are frequent winners.


7. Toss Winner vs Match Winner Rate

In [None]:
plt.figure(figsize=(5,4))
sns.countplot(x="toss_win_match_win", data=df)
plt.title("Toss Winner Also Won Match")
plt.show()

Observation: Toss winners won 51% of matches.


8.Largest Win Margins

In [None]:
plt.figure(figsize=(10,4))
sns.histplot(df["win_by_runs"], bins=30, color="teal", kde=True)
plt.title("Distribution of Wins by Runs")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(df["win_by_wickets"], bins=10, color="salmon", kde=True)
plt.title("Distribution of Wins by Wickets")
plt.show()

Observation: Most wins are by small margins.


9. Team with Most Wins While Defending

In [None]:
plt.figure(figsize=(8,4))
sns.countplot(y="winner", data=df[df["match_type"]=="defend"], order=df[df["match_type"]=="defend"]["winner"].value_counts().index)
plt.title("Teams Winning by Defending")
plt.show()

Observation: CSK dominates when defending totals.


10. Most Matches Played by a Team

In [None]:

teams = pd.concat([df["team1"], df["team2"]])
team_df = pd.DataFrame(teams.value_counts()).reset_index()
team_df.columns = ["team", "match_count"]

top_teams = team_df.head(5)

plt.figure(figsize=(8,4))
sns.barplot(y="team", x="match_count", data=top_teams)
plt.title("Top Teams by Matches Played")
plt.xlabel("Match Count")
plt.ylabel("Team")
plt.show()

 MI and RCB played the most matches.