In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
ball_by_ball_data = pd.read_csv('/kaggle/input/ipl-2008-to-2021-all-match-dataset/IPL_Ball_by_Ball_2008_2022.csv')
match_data = pd.read_csv('/kaggle/input/ipl-2008-to-2021-all-match-dataset/IPL_Matches_2008_2022.csv')

In [None]:
# Find out the highest wicket taker in any particular season.
out = ['caught','caught and bowled','bowled','stumped','lbw','hit wicket']
wicket_deliveries = ball_by_ball_data[(ball_by_ball_data['isWicketDelivery']==1) & (ball_by_ball_data['kind'].isin(out))]
merged_data = pd.merge(wicket_deliveries, match_data[match_data['SuperOver'] == 'N'], on='ID')

season_wickets = merged_data.groupby(['Season', 'bowler'])['isWicketDelivery'].count().reset_index()
season_wickets.columns = ['Season', 'Player', 'Wickets']
season_wickets = season_wickets.sort_values(by='Wickets', ascending=False)

max_wickets_each_season = season_wickets.loc[season_wickets.groupby('Season')['Wickets'].idxmax()].reset_index()
highest_wicket_taker = max_wickets_each_season.loc[max_wickets_each_season['Wickets'].idxmax()]
print("Highest wicket-taker in any particular season: ", highest_wicket_taker['Player'], " - ", highest_wicket_taker['Wickets'], "Wickets")

In [None]:
# 2nd opeartion
player_deliveries = ball_by_ball_data.groupby('batter')['ballnumber'].count().reset_index()
player_deliveries.columns = ['Player', 'Deliveries Faced']
player_with_most_deliveries = player_deliveries.loc[player_deliveries['Deliveries Faced'].idxmax()]

print("Player who has faced the highest number of deliveries in the history of IPL:", player_with_most_deliveries['Player'])

sixes_by_player = ball_by_ball_data[(ball_by_ball_data['batsman_run'] == 6) & (ball_by_ball_data['non_boundary'] == 0)].groupby('batter').size().reset_index(name='Sixes')
player_with_most_sixes = sixes_by_player.loc[sixes_by_player['Sixes'].idxmax()]

print("Player who has hit the highest number of sixes in the history of IPL:", player_with_most_sixes['batter'])

catches_by_player = ball_by_ball_data[(ball_by_ball_data['kind'] == 'caught')].groupby('fielders_involved').size().reset_index(name='Catches')
player_with_most_catches = catches_by_player.loc[catches_by_player['Catches'].idxmax()]
print("Player who has taken the highest number of catches in the history of IPL:", player_with_most_catches['fielders_involved'])

lbw_wickets_data = ball_by_ball_data[ball_by_ball_data['kind'] == 'lbw']
lbw_wickets_by_bowler = lbw_wickets_data.groupby('bowler').size().reset_index(name='LBW Wickets')
player_with_most_lbw = lbw_wickets_by_bowler.loc[lbw_wickets_by_bowler['LBW Wickets'].idxmax()]
print("Player who has taken the highest number of LBW wickets in the history of IPL: ", player_with_most_lbw['bowler'])

In [None]:
merged_data = pd.merge(ball_by_ball_data, match_data[match_data['SuperOver'] == 'N'], on='ID')
season_run_getters = merged_data.groupby(['Season', 'batter'])['batsman_run'].sum().reset_index()

highest_run_getters = season_run_getters.loc[season_run_getters.groupby('Season')['batsman_run'].idxmax()]
seasons = highest_run_getters['Season'].tolist()

plt.figure(figsize=(12, 6))
for season in seasons:
    season_data = highest_run_getters[highest_run_getters['Season'] == season]
    plt.bar(season_data['Season'], season_data['batsman_run'], label=str(season), alpha=0.7)

plt.xlabel('Season')
plt.ylabel('Runs')
plt.title('Highest Run Getters in Each IPL Season')
plt.xticks(seasons)
plt.tight_layout()
plt.show()

In [None]:
merged_data = pd.merge(ball_by_ball_data, match_data[match_data['SuperOver'] == 'N'], on='ID')
batsmen_fifty_balls = ball_by_ball_data.groupby('batter').filter(lambda x: len(x) >= 50)

batsman_stats = batsmen_fifty_balls.groupby('batter').agg({'batsman_run': 'sum', 'ballnumber': 'count'})
batsman_stats['Strike Rate'] = (batsman_stats['batsman_run'] / batsman_stats['ballnumber']) * 100

top_batsmen = batsman_stats.sort_values(by='Strike Rate', ascending=False).head(5)
print("Top 5 batsmen in the history of IPL based on strike rate (faced at least 50 balls):")
top_batsmen[['Strike Rate']]