In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [3]:
dataSet = pd.read_csv('ipl_2025_auction_players.csv')

In [4]:
dataSet.head()

Unnamed: 0,Players,Team,Type,Base,Sold
0,Virat Kohli,RCB,BAT,-,21.0
1,Rajat Patidar,RCB,BAT,-,11.0
2,Yash Dayal,RCB,BOWL,-,5.0
3,Jasprit Bumrah,MI,BOWL,-,18.0
4,Suryakumar Yadav,MI,BAT,-,16.35


In [6]:
dataSet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 623 entries, 0 to 622
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Players  623 non-null    object
 1   Team     623 non-null    object
 2   Type     623 non-null    object
 3   Base     623 non-null    object
 4   Sold     623 non-null    object
dtypes: object(5)
memory usage: 24.5+ KB


In [7]:
dataSet.shape

(623, 5)

In [8]:
dataSet.columns

Index(['Players', 'Team', 'Type', 'Base', 'Sold'], dtype='object')

In [9]:
dataSet.isnull().sum()

Players    0
Team       0
Type       0
Base       0
Sold       0
dtype: int64

In [10]:
dataSet.describe()

Unnamed: 0,Players,Team,Type,Base,Sold
count,623,623,623,623.0,623
unique,620,11,4,9.0,72
top,Daryl Mitchell,-,BOWL,0.3,TBA
freq,2,395,216,322.0,293


In [11]:
dataSet['Sold'].unique()

array(['21.00', '11.00', '5.00', '18.00', '16.35', '16.30', '8.00',
       '23.00', '14.00', '6.00', '13.00', '12.00', '4.00', '16.50',
       '13.25', '10.00', '8.50', '5.50', '15.75', '26.75', '27.00',
       '10.75', '11.75', '8.75', '7.50', '12.25', '6.25', '9.00', '2.00',
       '3.40', 'Unsold', '9.75', '23.75', '4.20', '3.60', '11.25',
       '11.50', '4.80', '12.50', '9.50', '6.50', '3.20', '5.25', '4.40',
       '2.40', '0.50', '3.00', '0.30', '1.50', '1.70', '0.95', '1.20',
       '3.80', '0.65', '1.80', '2.20', '1.60', '2.60', '7.00', '5.75',
       '0.75', '1.00', '9.25', 'TBA', '0.80', '1.30', '0.40', '0.55',
       '2.80', '0.35', '1.25', '1.10'], dtype=object)

In [13]:
dataSet['Sold'] = dataSet['Sold'].replace(['Unsold', 'TBA'], '0')
dataSet['Sold'] = pd.to_numeric(dataSet['Sold'], errors='coerce')

In [14]:
dataSet.dtypes

Players     object
Team        object
Type        object
Base        object
Sold       float64
dtype: object

In [15]:
dataSet["Base"].unique()

array(['-', '2.00', '1.50', '0.75', '1.00', '0.30', '0.50', '0.40',
       '1.25'], dtype=object)

In [16]:
dataSet['Base'] = dataSet['Base'].replace('-','0')
dataSet['Base'] = pd.to_numeric(dataSet['Base'], errors= 'coerce')

In [17]:
dataSet.dtypes

Players     object
Team        object
Type        object
Base       float64
Sold       float64
dtype: object

In [18]:
dataSet['Team'].unique()

array(['RCB', 'MI', 'SRH', 'CSK', 'DC', 'KKR', 'RR', 'GT', 'LSG', 'PBKS',
       '-'], dtype=object)

# Number Of Playes Each Team Purchased

In [20]:
dataSet['Team'].value_counts()

Team
-       395
CSK      25
GT       25
PBKS     25
LSG      24
MI       23
DC       23
RCB      22
KKR      21
SRH      20
RR       20
Name: count, dtype: int64

In [21]:
dataSet[['Base','Sold']].describe()

Unnamed: 0,Base,Sold
count,623.0,623.0
mean,0.679133,1.897432
std,0.620929,4.415365
min,0.0,0.0
25%,0.3,0.0
50%,0.3,0.0
75%,0.75,0.8
max,2.0,27.0


# Distribution Of Sold Price

In [3]:
sold_prices = dataSet['Sold'].dropna()

fig1 = px.histogram(
    sold_prices,
    x='Sold',
    nbins=30,
    histnorm='probability density',
    title='<b>Distribution of Sold Prices</b><br><sup>Understanding the spread of property sales</sup>', # Bold title, added subtitle
    labels={'Sold': 'Sold Price (in Crores)'},
    color_discrete_sequence=px.colors.qualitative.Plotly, 
    hover_data={'Sold': ':.2f'}
)

fig1.update_traces(
    marker=dict(line=dict(width=1, color='black')),
    selector=dict(type='histogram') 
)

fig1.update_layout(
    xaxis_title='<b>Sold Price (in Crores)</b>', 
    yaxis_title='<b>Frequency (Probability Density)</b>',
    bargap=0.1, # Slightly reduced gap for a more continuous look
    plot_bgcolor='white', # White plot background
    paper_bgcolor='white', # White paper background
    font=dict(family="Arial, sans-serif", size=12, color="RebeccaPurple"), 
    title_x=0.5,
    margin=dict(l=40, r=40, t=80, b=40), # Adjust margins
    hovermode="x unified" # Unified hover for cleaner interaction
)

fig1.show(renderer='iframe_connected')

NameError: name 'dataSet' is not defined

# Top 10 Most Expensive Players

In [37]:
top_expensive_buys = dataSet.sort_values(by='Sold',ascending=False).head(10)

fig2 = px.bar(
    top_expensive_buys,
    x ='Sold',
    y = 'Players',
    title = 'Top 10 Most Expensive Buys Of Auction',
    labels={'Sold': 'Sold Price (in Crores)', 'Players': 'Players'},
    color = 'Sold', color_continuous_scale = 'rainbow',
    hover_data = {'Sold' :':.2f'}
)

fig2.update_traces(
    marker = dict(line=dict(width = 1, color = 'black')),
    selector= dict(type = 'bar')
)
fig2.update_layout(
    xaxis_title = '<b>Sold Price (in Crores)</b>',
    yaxis_title = '<b>Players</b>',
    bargap =0.1,
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    font = dict(family="Arial, sans-serif", size=12, color="RebeccaPurple"),
    title_x = 0.5,
    margin = dict(l=40, r=40, t=80, b=40),
    hovermode = 'y unified'
)

fig2.show(renderer = 'iframe_connected')

# Top 10 Least Expensive Players

In [97]:
least_expensive_buys = dataSet[dataSet['Sold']>0].sort_values(by = 'Sold' ,ascending=True).head(10)
least_expensive_buys

Unnamed: 0,Players,Team,Type,Base,Sold,Sold_Status
107,Aryan Juyal,LSG,BAT,0.3,0.3,Sold
105,Nishant Sindhu,GT,AR,0.3,0.3,Sold
111,Luvnith Sisodia,KKR,BAT,0.3,0.3,Sold
110,Anuj Rawat,GT,BAT,0.3,0.3,Sold
128,Manav Suthar,GT,BOWL,0.3,0.3,Sold
127,Kumar Kartikeya,RR,BOWL,0.3,0.3,Sold
123,Shreyas Gopal,CSK,BOWL,0.3,0.3,Sold
124,Mayank Markande,KKR,BOWL,0.3,0.3,Sold
96,Atharva Taide,SRH,BAT,0.3,0.3,Sold
470,Ramakrishna Ghosh,CSK,AR,0.3,0.3,Sold


# Base Price V/S Sold Price

In [44]:
fig3 = px.box (
    dataSet,
    x = 'Base',
    y = 'Sold',
    title = 'Base Price V/S Sold Price',
    labels= {'Base' : 'Base Price (in Crores)',
             'Sold' : 'Sold Price (in Crores)'},
    color= 'Base',
    hover_data= {'Sold' :':.2f'}
)

fig3.update_traces(
    marker = dict(line =dict(color = 'black', width = 1)),
    selector = dict(type ='box')
)
fig3.update_layout(
    xaxis_title='<b>Base Price (in Crores)</b>', 
    yaxis_title='<b>Sold Price (in Crores)</b>',
    bargap = 0.2,
    paper_bgcolor = 'white',
    # plot_bgcolor = 'lightblue',
    font=dict(family="Arial, sans-serif", size=12, color="RebeccaPurple"), 
    title_x=0.5, 
    margin=dict(l=40, r=40, t=80, b=40),
    hovermode = 'x unified'
)
fig3.show(renderer='iframe_connected')

# Unsold Players

In [45]:
unsold_palyer = dataSet[dataSet['Sold'] == 0]
print(f'Count Of Unsold Players : {len(unsold_palyer)}')

Count Of Unsold Players : 396


In [46]:
unsold_count_type = unsold_palyer['Type'].value_counts()
unsold_count_type

Type
AR      138
BOWL    134
BAT     123
WK        1
Name: count, dtype: int64

In [1]:



fig4 = px.bar(
    x=unsold_count_type.index,
    y=unsold_count_type.values,
    labels={
        'x': 'Type', 
        'y': 'Number of Unsold Players',
    },
    color=unsold_count_type.index, 
    color_discrete_sequence=px.colors.qualitative.Dark24, 
    title='<b>Unsold Players by Type</b><br><sup>Distribution of players who did not find a team</sup>', # Enhanced title with subtitle
)

fig4.update_traces(
    marker=dict(line=dict(width=1, color='black')),
    selector=dict(type='bar'),
)

fig4.update_layout(
    xaxis_title="<b>Player's Type</b>",
    yaxis_title='<b>Count of Unsold Players</b>',
    bargap=0.2,
    plot_bgcolor='white',  
    paper_bgcolor='white',
    title_x=0.5,
    font=dict(family="Segoe UI, sans-serif", size=12, color="DarkSlateGray"), # Custom font style
    hovermode='x unified',
    margin=dict(l=40, r=40, t=80, b=40),
)

fig4.show(renderer='iframe_connected')

NameError: name 'px' is not defined

# Sold Price By Team

In [66]:
fig5 = px.box(
    dataSet,
    x = 'Team',
    y = 'Sold',
    title = '<b>Sold Price by Team</b>',
    labels={
        'x' : 'Team Name',
        'y' : 'Sold Player Count',
    }
)

fig5.update_traces(
    marker = dict(line= dict(color = 'black', width = 1)),
    selector= dict(type = 'bar'),
)

fig5.update_layout(
    xaxis_title='Team Names',
    yaxis_title='Sold Price',
    bargap = 0.2,
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    title_x = 0.5,
    hovermode = 'x unified',
    xaxis_tickangle = -45,
)

fig5.show(renderer='iframe_connected')


# Average Sold Price by Player Type

In [68]:
avg_sold_type = dataSet.groupby('Type')['Sold'].mean().sort_values(ascending = False)
print(f"Average Sold Price by Player Type:\n {avg_sold_type}")

Average Sold Price by Player Type:
 Type
BAT     2.251047
BOWL    1.839352
AR      1.650465
WK      0.000000
Name: Sold, dtype: float64


In [74]:
fig6 = px.bar(
    x = avg_sold_type.index,
    y = avg_sold_type.values,
    title="<b>Average Sold Price by Player Type</b>", 
    labels={'x': 'Player Type',
            'y': 'Average Sold Price (in Crores)'
           }, 
    color=avg_sold_type.values, color_continuous_scale='Blues'
)

fig6.update_traces(
    marker = dict(line = dict(width = 0.1, color = 'black')),
    selector= dict(type ='bar'),
)
fig6.update_layout(
    xaxis_title="Player Type",
    yaxis_title="Average Sold Price (in Crores)",
    hovermode = 'x unified',
    bargap = 0.2,
     title_x=0.5,
    font=dict(family="Segoe UI, sans-serif", size=12, color="DarkSlateGray"),
    margin=dict(l=40, r=40, t=80, b=40),
)
fig6.show(renderer='iframe_connected')
    

# Total Amount Spending By Each Team

In [77]:
total_Spendable_amount = dataSet.groupby('Team')['Sold'].sum().sort_values(ascending = False)
print(f'Toatal Amount Spend By each Team {total_Spendable_amount}')

Toatal Amount Spend By each Team Team
CSK     119.95
GT      119.85
MI      119.80
SRH     119.80
RR      119.70
PBKS    119.65
LSG     119.60
RCB     119.25
DC      116.55
KKR     107.95
-         0.00
Name: Sold, dtype: float64


# Sold vs Unsold Players by Type

In [79]:
dataSet['Sold_Status'] = dataSet['Sold'].apply(
    lambda x : 'Sold' if x not in [0, np.nan] else 'Unsold' )

fig8 = px.histogram(
    dataSet,
    x = 'Type',
    color = 'Sold_Status',
    title='Count of Sold vs Unsold Players by Type', 
    labels={'Type': 'Player Type', 'Sold_Status': 'Sold Status'},
    barmode='group',
)

fig8.update_layout(
    xaxis_title="Player Type",
    yaxis_title="Count",
    hovermode = 'x unified',
    bargap = 0.2,
    title_x=0.5,
    font=dict(family="Segoe UI, sans-serif", size=12, color="DarkSlateGray"),
    margin=dict(l=40, r=40, t=80, b=40),
)
fig8.show(renderer='iframe_connected')

In [80]:
new_dataSet = dataSet[dataSet['Sold'] > 0]
new_dataSet

Unnamed: 0,Players,Team,Type,Base,Sold,Sold_Status
0,Virat Kohli,RCB,BAT,0.0,21.00,Sold
1,Rajat Patidar,RCB,BAT,0.0,11.00,Sold
2,Yash Dayal,RCB,BOWL,0.0,5.00,Sold
3,Jasprit Bumrah,MI,BOWL,0.0,18.00,Sold
4,Suryakumar Yadav,MI,BAT,0.0,16.35,Sold
...,...,...,...,...,...,...
573,Eshan Malinga,SRH,BOWL,0.3,1.20,Sold
601,Tripurana Vijay,DC,AR,0.3,0.30,Sold
609,Madhav Tiwari,DC,AR,0.3,0.40,Sold
616,Vignesh Puthur,MI,AR,0.3,0.30,Sold


In [84]:
fig8 = px.histogram(
    new_dataSet, x='Team',
    color='Type', 
    title='<b>Player Type Composition by Team</b>', 
    labels={'Team': 'Team', 'Type': 'Player Type'},
    barmode='stack'
)

fig8.update_traces(
    marker = dict(line = dict(color = 'yellow', width = 1)),
    selector= dict(type = 'bar'),
)

fig8.update_layout(
    xaxis_title='Team',
    yaxis_title='Count of Players',
    xaxis_tickangle=-45,
    hovermode = 'x unified',
    bargap = 0.2,
    title_x = 0.5,
)
fig8.show(renderer='iframe_connected')