# **Importing the Libraries**

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly
from plotly.offline import plot, iplot
import warnings
warnings.filterwarnings('ignore')

  shapely_geos_version, geos_capi_version_string


# **Importing the dataset**

In [2]:
dataset = pd.read_csv("../input/ipl-player-auction-dataset-from-start-to-now/IPLPlayerAuctionData.csv")
dataset

Unnamed: 0,Player,Role,Amount,Team,Year,Player Origin
0,Aaron Finch,Batsman,40000000,Sunrisers Hyderabad,2014.0,Overseas
1,Aaron Finch,Batsman,32000000,Mumbai Indians,2015.0,Overseas
2,Aaron Finch,Batsman,10000000,Gujarat Lions,2016.0,Overseas
3,Aaron Finch,Batsman,62000000,Kings XI Punjab,2018.0,Overseas
4,Aaron Finch,Batsman,44000000,Royal Challengers Bangalore,2020.0,Overseas
...,...,...,...,...,...,...
965,Yuzvendra Singh Chahal,Bowler,1000000,Royal Challengers Bangalore,2014.0,Indian
966,Yuzvendra Singh Chahal,Bowler,60000000,Royal Challengers Bangalore,2018.0,Indian
967,Zaheer Khan,Bowler,26000000,Mumbai Indians,2014.0,Indian
968,Zaheer Khan,Bowler,40000000,Delhi Daredevils,2015.0,Indian


# **Data Wrangling**

In [3]:
dataset.head()

Unnamed: 0,Player,Role,Amount,Team,Year,Player Origin
0,Aaron Finch,Batsman,40000000,Sunrisers Hyderabad,2014.0,Overseas
1,Aaron Finch,Batsman,32000000,Mumbai Indians,2015.0,Overseas
2,Aaron Finch,Batsman,10000000,Gujarat Lions,2016.0,Overseas
3,Aaron Finch,Batsman,62000000,Kings XI Punjab,2018.0,Overseas
4,Aaron Finch,Batsman,44000000,Royal Challengers Bangalore,2020.0,Overseas


In [4]:
dataset.columns

Index(['Player', 'Role', 'Amount', 'Team', 'Year', 'Player Origin'], dtype='object')

In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 970 entries, 0 to 969
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Player         970 non-null    object 
 1   Role           970 non-null    object 
 2   Amount         970 non-null    int64  
 3   Team           970 non-null    object 
 4   Year           969 non-null    float64
 5   Player Origin  970 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 45.6+ KB


In [6]:
dataset.shape

(970, 6)

In [7]:
dataset.describe(include="all")

Unnamed: 0,Player,Role,Amount,Team,Year,Player Origin
count,970,970,970.0,970,969.0,970
unique,543,4,,15,,2
top,Jaydev Unadkat,Bowler,,Royal Challengers Bangalore,,Indian
freq,9,352,,115,,616
mean,,,21054510.0,,2017.910217,
std,,,28000910.0,,2.964527,
min,,,1000000.0,,2013.0,
25%,,,2000000.0,,2015.0,
50%,,,9500000.0,,2018.0,
75%,,,30000000.0,,2021.0,


In [8]:
dataset.isnull().sum()

Player           0
Role             0
Amount           0
Team             0
Year             1
Player Origin    0
dtype: int64

**Removing the row where year is null**

In [9]:
dataset.dropna(inplace=True)

Converting the data type of Year's column from float to int

In [10]:
dataset.Year = dataset.Year.apply(int)

In [11]:
print(f"Dataset contains data of \033[1m{dataset.Player.unique().size}\033[0m Players")
dataset.Player.unique()

Dataset contains data of [1m543[0m Players


array(['Aaron Finch', 'Abdul Samad', 'Abhijeet Tomar', 'Abhimanyu Mithun',
       'Abhinav Sadarangani', 'Abhishek Nayar', 'Abhishek Sharma',
       'Abu Nechim Ahmed', 'Adam Milne', 'Adam Zampa', 'Aditya Garhwal',
       'Aditya Tare', 'Agnivesh Ayachi', 'Aiden Blizzard',
       'Aiden Markram', 'Ajantha Mendis', 'Ajinkya Rahane', 'Akash Deep',
       'Akash Singh', 'Akhil Arvind Herwadkar', 'Akila Dananjaya',
       'Akila Dhananjaya', 'Akshar Rajesh Patel', 'Akshay Karnewar',
       'Akshay Wakhare', 'Akshdeep Nath', 'Albie Morkel', 'Alex Carey',
       'Alex Hales', 'Alzarri Joseph', 'Aman Khan', 'Ambati Rayudu',
       'Amit Mishra', 'Amit Paunikar', 'Andre Russell', 'Andrew Tye',
       'Aneeshwar Gautam', 'Angelo Mathews', 'Aniket Choudhary',
       'Anirudha Ashok Joshi', 'Ankeet Bawane', 'Ankit Nagendra Sharma',
       'Ankit Sharma', 'Ankit Singh Rajpoot', 'Ankush Bains',
       'Anmolpreet Singh', 'Anrich Nortje', 'Ansh Patel', 'Anuj Rawat',
       'Anukul Roy', 'Anunay Sing

In [12]:
print(f"Dataset contains data of \033[1m{dataset.Role.unique().size}\033[0m Roles")
dataset.Role.unique()

Dataset contains data of [1m4[0m Roles


array(['Batsman', 'All-Rounder', 'Bowler', 'Wicket Keeper'], dtype=object)

In [13]:
print(f"Dataset contains data of \033[1m{dataset.Team.unique().size}\033[0m Teams")
dataset.Team.unique()

Dataset contains data of [1m15[0m Teams


array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Kings XI Punjab', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Gujarat Titans', 'Pune Warriors India',
       'Rajasthan Royals', 'Delhi Daredevils', 'Chennai Super Kings',
       'Rising Pune Supergiant', 'Delhi Capitals', 'Lucknow Super Giants',
       'Punjab Kings'], dtype=object)

In [14]:
print(f"Dataset contains data of \033[1m{dataset.Year.unique().size}\033[0m years")
dataset.Year.unique()

Dataset contains data of [1m10[0m years


array([2014, 2015, 2016, 2018, 2020, 2022, 2013, 2021, 2017, 2019])

In [15]:
dataset['Player Origin'].unique()

array(['Overseas', 'Indian'], dtype=object)

# **Exploratory Data Analysis**

# Total Amount Spent By Each Team Each Year

In [16]:
teams = dataset.Team.unique()
for team in teams:
    data = dataset[dataset['Team'] == team].groupby('Year')['Amount'].sum()
    fig = px.line(data, x = data.index, y = 'Amount', title = f"{team}'s spent amount", text=data.index)
    fig.update_traces(textposition="top right")
    fig.show()

# Line Chart of different teams for comparision

In [17]:
data = pd.DataFrame(dataset.groupby(['Team', 'Year'])['Amount'].sum()).reset_index()
fig = px.line(data, x='Year', y='Amount', color='Team', symbol='Team')
fig.update_layout(title_text = "Overall comparision of different countries")
fig.show()

# Overall Expense Each Year

In [18]:
data = dataset.groupby('Year')['Amount'].sum()
fig = px.line(data, x = data.index, y = 'Amount', title = "Overall Expense Each year", text=data.index)
fig.update_traces(textposition="top right")
fig.show()

# Most Expensive Player Each Year

In [19]:
data = []
for i in sorted(dataset.Year.unique()):
    df = pd.DataFrame(dataset.groupby('Year').get_group(i))
    df = df[df['Amount'] == df.Amount.max()]
    data.append([i, np.array(df["Player"]), np.array(df["Amount"])[0], np.array(df["Team"])[0]])
data = pd.DataFrame(data, columns=["Year", "Players", "Amount", "Team"])
fig = px.line(data, x = data.Year, y = data.Amount, title = "Most Expensive Player Each year", text=data.Players)
fig.update_traces(textposition="top right")
fig.show()
fig = go.Figure(data=[go.Table(header=dict(values=['Year', 'Player','Team', 'Amount']), cells=dict(values=[data.Year, data.Players, data.Team, data.Amount]))])
fig.show()

# Total Amount Spent on Player Based on There Role

In [20]:
roles = dataset.Role.unique()
for role in roles:
    data = dataset[dataset['Role'] == role].groupby('Year')['Amount'].sum()
    fig = px.line(data, x = data.index, y = 'Amount', title = f"Total Amount Spent on {role}'s", text=data.index)
    fig.update_traces(textposition="top right")
    fig.show()

# Overall comparision of different Roles

In [21]:
data = pd.DataFrame(dataset.groupby(['Role', 'Year'])['Amount'].sum()).reset_index()
fig = px.line(data, x='Year', y='Amount', color='Role', symbol='Role')
fig.update_layout(title_text = "Overall comparision of different Roles")
fig.show()

# Total Number of Players Sold Each Year Based on Role

In [22]:
years = dataset.Year.unique()
for year in sorted(years):
    data = dataset[dataset['Year'] == year]
    fig = px.histogram(data, x="Role", title = f"Count Plot Based on role for {year}")
    fig.show()