In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

import warnings
warnings.filterwarnings("ignore")

Data Preparation and Cleaning

In [None]:
IPL_matches_df = pd.read_csv('IPL_Matches_2008_2022.csv')

In [None]:
IPL_matches_df.head()

In [None]:
IPL_matches_df.tail()

In [None]:
pd.options.display.max_rows = 999

In [None]:
IPL_matches_df.shape

In [None]:
IPL_matches_df.info()

In [None]:
IPL_matches_df.describe()

In [None]:
IPL_matches_df.nunique()

In [None]:
IPL_matches_df.isnull().values.any()

In [None]:
IPL_matches_df.isnull().sum()

In [None]:
IPL_matches_df.isnull().sum().sum()

In [None]:
IPL_matches_df.Date.dtype

In [None]:
IPL_matches_df['Date']=pd.to_datetime(IPL_matches_df.Date)
IPL_matches_df.Date.head(10)

In [None]:
IPL_matches_df.loc[(IPL_matches_df['Venue']== 'Dubai International Cricket Stadium'),['City']] = 'Dubai'
IPL_matches_df.loc[(IPL_matches_df['Venue'] == 'Sharjah Cricket Stadium'),['City']] = 'Sharjah'
IPL_matches_df['SuperOver'].fillna('NoResult',inplace=True)
IPL_matches_df['WinningTeam'].fillna('NoResult',inplace=True)
IPL_matches_df['Player_of_Match'].fillna('NoResult',inplace= True)
IPL_matches_df['Margin'].fillna('NoMargin',inplace = True)

In [None]:
IPL_matches_df=IPL_matches_df.drop(['method'],axis =1 )

In [None]:
IPL_matches_df.isnull().any()

In [None]:
IPL_matches_df.duplicated().any()

In [None]:
IPL_matches_df['Team1']=IPL_matches_df['Team1'].str.replace('Kings XI Punjab','Punjab Kings')
IPL_matches_df['Team2']=IPL_matches_df['Team2'].str.replace('Kings XI Punjab','Punjab Kings')
IPL_matches_df['WinningTeam']=IPL_matches_df['WinningTeam'].str.replace('Kings XI Punjab','Punjab Kings')

IPL_matches_df['Team1']=IPL_matches_df['Team1'].str.replace('Delhi Daredevils','Delhi Capitals')
IPL_matches_df['Team2']=IPL_matches_df['Team2'].str.replace('Delhi Daredevils','Delhi Capitals')
IPL_matches_df['WinningTeam']=IPL_matches_df['WinningTeam'].str.replace('Delhi Daredevils','Delhi Capitals')

IPL_matches_df['Team1']=IPL_matches_df['Team1'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')
IPL_matches_df['Team2']=IPL_matches_df['Team2'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')
IPL_matches_df['WinningTeam']=IPL_matches_df['WinningTeam'].str.replace('Rising Pune Supergiants','Rising Pune Supergiant')

In [None]:
IPL_matches_df.head()

In [None]:
IPL_ball_by_ball_df = pd.read_csv('E:\python_files\IPL_Ball_by_Ball_2008_2022.csv')
IPL_ball_by_ball_df.head()



In [None]:
IPL_ball_by_ball_df.shape

In [None]:
IPL_ball_by_ball_df.info()

In [None]:
IPL_ball_by_ball_df.isnull().sum()


In [None]:
IPL_ball_by_ball_df['extra_type'].fillna('N',inplace = True)
IPL_ball_by_ball_df['player_out'].fillna('N',inplace = True)
IPL_ball_by_ball_df['kind'].fillna('N',inplace = True)
IPL_ball_by_ball_df['fielders_involved'].fillna('N',inplace = True)

In [None]:
IPL_ball_by_ball_df.isnull().sum()

In [None]:
IPL_ball_by_ball_df.isnull().values.any()

In [None]:
IPL_matches_df['Team1'].unique()
IPL_matches_df['Team1'].unique().reshape(15, 1)


In [None]:
match_df = pd.concat([IPL_matches_df['Team1'], IPL_matches_df['Team2']]).value_counts().reset_index()
match_df = match_df.rename(columns={'index': 'Teams', 0: 'Total_Matches'})
match_df


In [None]:
def get_color(name, number):
    pal = list(sns.color_palette(palette=name, n_colors=number).as_hex())
    return pal

In [None]:
pal_vi = get_color('viridis_r', len(match_df))

In [None]:
!pip install plotly --upgrade --quiet
import plotly.express as px




In [None]:
import plotly.express as px

fig = px.bar(match_df, x='Teams', y='count',color='Teams',color_discrete_sequence=pal_vi)

fig.update_traces(texttemplate='%{text:.3s}', textposition='outside')

fig.update_layout(plot_bgcolor='white', paper_bgcolor='white',
                  width=1100, height=500,
                  margin=dict(t=15, l=15, r=15, b=15))

fig.show()


winning ratio analysis

In [None]:
win_df = match_df.copy()  # Create a copy of match_df to avoid modifying the original DataFrame

# Count the total wins for each team
win_counts = IPL_matches_df['WinningTeam'].value_counts()

# Add the total wins to the win_df DataFrame
win_df['total_wins'] = win_counts.reindex(win_df['Teams']).fillna(0).values

# Calculate the winning ratio
win_df['winning_ratio'] = (win_df['total_wins'] / win_df['count']) * 100

# Display the DataFrame
win_df


In [None]:
plt.figure(figsize=(15,6.5))
sns.set_style('darkgrid')
wi = sns.barplot(data=win_df, x='winning_ratio', y='Teams', ci=False, palette='viridis_r')
wi.set_xticklabels(win_df['winning_ratio'], rotation=55, fontdict={'fontsize':10})
plt.show()

In [None]:
#Top 10 Batters that gets Player of Match Title
player_of_macth_df = IPL_matches_df['Player_of_Match'].value_counts().reset_index()
# Renaming the columns of the DataFrame
player_of_macth_df.columns = ['Player_of_Match', 'Count']
# Selecting the top 10 players
top_10_Player_of_match = player_of_macth_df.head(10)
player_of_macth_df.head(10)

In [None]:
plt.figure(figsize=(9,4))
plt.plot(top_10_Player_of_match['Count'], top_10_Player_of_match['Player_of_Match'], 'o-g')

plt.xlabel('Count')
plt.ylabel('Player of Match')

plt.title('Top 10 Player of Match')

In [None]:
#Top 10 IPL hosted cities
citraw = IPL_matches_df['City'].value_counts().reset_index()
# Renaming the columns of the DataFrame
citraw.columns = ['City', 'Hosted_Number']

# Selecting the top 10 cities
top10_city_host = citraw.head(10)

top10_city_host = citraw.head(10)
top10_city_host

In [None]:
plt.figure(figsize=(16,8))
palette_color = sns.color_palette("ch:s=.25,rot=-.25")
sns.barplot(x='Hosted_Number', y='City',palette = palette_color, data=top10_city_host);
plt.title('Top 10 IPL hosted cities');

In [None]:
#Number of Matches hosted in Seasons
IPL_matches_df["Season"]=pd.DatetimeIndex(IPL_matches_df["Date"]).year
matches_in_season=IPL_matches_df.groupby(['Season'])['ID'].count().reset_index().rename(columns={'ID':'matches'})
matches_in_season

In [None]:
plt.figure(figsize=(12,5))

sns.barplot(x='Season', y='matches', data=matches_in_season);
plt.title('No of total matches per season')

In [None]:
#Most Time Toss Winning Teams
most_toss_winner = IPL_matches_df['TossWinner'].value_counts().reset_index()
# Rename the columns of the DataFrame
most_toss_winner.columns = ['TossWinner', 'Count']

most_toss_winner

In [None]:
plt.figure(figsize=(16,12))
plt.plot(most_toss_winner['Count'], most_toss_winner['TossWinner'], marker='s', c='r', ls='--', lw=2, ms=10, mew=2, mec='navy')

plt.xlabel('Toss Winner')
plt.ylabel('Count')

plt.title('Most time toss winning teams');

In [None]:
#Toss Decision in each Season of IPL
plt.figure(figsize=(16,6))
color_palette = sns.color_palette("Spectral")
sns.countplot(x='Season', hue='TossDecision', data=IPL_matches_df,palette=color_palette,saturation=1)
plt.title('Toss Decision in each Season of IPL');

In [None]:
#Which Team won greater IPL Trophies?
final_df = IPL_matches_df.loc[(IPL_matches_df['MatchNumber']=='Final')][['City','Team1','Team2','WinningTeam']].reset_index().drop(['index'],axis=1)
final_df

In [None]:
import plotly.express as px

# Count the number of wins for each team
win_counts = final_df['WinningTeam'].value_counts()

# Create the pie chart using Plotly Express
fig = px.pie(names=win_counts.index, values=win_counts.values,
             color=win_counts.index, color_discrete_sequence=pal_vi)

# Update trace properties
fig.update_traces(textposition='inside', textinfo='percent+label', sort=False)

# Update layout properties
fig.update_layout(width=1000, height=550)

# Show the plot
fig.show()


In [None]:
#Who is the best wicket taker bowler in IPL history?
wicket_tacker_bowler=IPL_ball_by_ball_df.groupby('bowler').sum().reset_index()
wicket_tacker_bowler = wicket_tacker_bowler.sort_values(by='isWicketDelivery',ascending=False).head(10)
wicket_tacker_bowler

In [None]:
plt.figure(figsize=(16,6))
plt.plot(wicket_tacker_bowler['bowler'], wicket_tacker_bowler['isWicketDelivery'], marker='o', c='c', ls='-.', lw=2, ms=10, mew=2, mec='navy')

plt.xlabel('Bowler')
plt.ylabel('Wickets taken')

plt.title('Top 10 wickets tackers');

In [None]:
#What is Toss Decision Ratio in IPL?
toss_decision_ratio = IPL_matches_df.TossDecision.value_counts()
toss_decision_ratio

In [None]:
labels = (np.array(toss_decision_ratio.index))
sizes = (np.array((toss_decision_ratio / toss_decision_ratio.sum())*100))
colors = ['cyan', 'yellow']
plt.pie(sizes, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=90)
plt.title("Ratio of toss Decision in IPL",fontweight="bold",fontsize=15)
plt.show()

In [None]:
#Is win depend on winning toss?
toss_vs_matchwin = (IPL_matches_df.TossWinner==IPL_matches_df.WinningTeam).value_counts()
toss_vs_matchwin

In [None]:
labels = (np.array(toss_vs_matchwin.index))
sizes = (np.array((toss_vs_matchwin / toss_vs_matchwin.sum())*100))
colors = ['red', 'pink']

plt.pie(sizes, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=90)
plt.title("Toss vs Match win",fontweight="bold",fontsize=15)
plt.show()

In [None]:
#Which batsmen have most runs in IPL history?
highest_run_batsmans=IPL_ball_by_ball_df.groupby('batter').sum().reset_index()
highest_run_batsmans = highest_run_batsmans.sort_values(by='batsman_run',ascending=False).head(10)
highest_run_batsmans

In [None]:
plt.figure(figsize=(12,5))
palette_color = sns.color_palette('flare')
plt.pie(highest_run_batsmans['batsman_run'],labels=highest_run_batsmans['batter'],colors=palette_color,autopct='%.0f%%')
plt.title('BATSMEN WITH MOST RUNS IN IPL HISTORY')
plt.show()