## Some imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np

In [4]:
df = pd.read_csv(r"data\player_ELO.csv")
df.head()

Unnamed: 0,Player,ELO,Role,Form,LastPlayed,DaysSinceLastMatch
0,SL Malinga,1838,bowler,0.312202,2024-04-07,340
1,CV Varun,1804,bowler,0.252937,2025-03-12,1
2,MA Starc,1794,bowler,0.30521,2025-03-12,1
3,SB Jakati,1780,bowler,0.197289,2023-10-24,506
4,DE Bollinger,1776,bowler,0.312146,2022-12-13,821


In [7]:
df['LastPlayed'] = pd.to_datetime(df['LastPlayed'])

## Some basic analysis on data

In [12]:
df.shape

(741, 6)

In [18]:
df.isna().sum()

Player                0
ELO                   0
Role                  0
Form                  0
LastPlayed            0
DaysSinceLastMatch    0
dtype: int64

In [19]:
df.duplicated().sum()

0

In [20]:
df.describe()

Unnamed: 0,ELO,Form,LastPlayed,DaysSinceLastMatch
count,741.0,741.0,741,741.0
mean,1554.946019,0.151929,2024-01-18 18:39:21.133603328,419.222672
min,1388.0,0.0,2022-03-14 00:00:00,1.0
25%,1497.0,0.088279,2023-02-20 00:00:00,42.0
50%,1537.0,0.153987,2024-03-29 00:00:00,349.0
75%,1605.0,0.217376,2025-01-30 00:00:00,752.0
max,1838.0,0.61,2025-03-12 00:00:00,1095.0
std,75.758452,0.087259,,369.938593


## Basic analysis functions:-

In [22]:
def analyze_elo_distribution():
    """Analyze the distribution of ELO ratings"""
    print("ELO Rating Analysis:")
    print(f"Mean ELO: {df['ELO'].mean():.2f}")
    print(f"Median ELO: {df['ELO'].median():.2f}")
    print(f"Min ELO: {df['ELO'].min()}")
    print(f"Max ELO: {df['ELO'].max()}")
    print(f"ELO Standard Deviation: {df['ELO'].std():.2f}")
    
    print("\nTop 5 players by ELO:")
    print(df.sort_values('ELO', ascending=False)[['Player', 'ELO', 'Role']].head())
    
    # Role-based ELO analysis
    print("\nELO statistics by role:")
    print(df.groupby('Role')['ELO'].agg(['count', 'mean', 'median', 'min', 'max', 'std']))

In [24]:
analyze_elo_distribution()

ELO Rating Analysis:
Mean ELO: 1554.95
Median ELO: 1537.00
Min ELO: 1388
Max ELO: 1838
ELO Standard Deviation: 75.76

Top 5 players by ELO:
         Player   ELO    Role
0    SL Malinga  1838  bowler
1      CV Varun  1804  bowler
2      MA Starc  1794  bowler
3     SB Jakati  1780  bowler
4  DE Bollinger  1776  bowler

ELO statistics by role:
             count         mean  median   min   max        std
Role                                                          
all-rounder     85  1553.329412  1537.0  1426  1738  65.185338
batsman        293  1548.337884  1529.0  1388  1756  72.924655
bowler         354  1562.169492  1555.0  1388  1838  80.324340
unknown          9  1501.222222  1490.0  1486  1525  16.991011


## Form vs ELO?

In [27]:
def analyze_form_vs_elo():
    """Analyze relationship between form and ELO"""
    correlation = df['ELO'].corr(df['Form'])
    print(f"\nCorrelation between Form and ELO: {correlation:.4f}")
    
    # Group players into form categories
    df['FormCategory'] = pd.qcut(df['Form'], 3, labels=['Low', 'Medium', 'High'])
    print("\nAverage ELO by Form Category:")
    print(df.groupby('FormCategory')['ELO'].mean().sort_values(ascending=False))

In [28]:
analyze_form_vs_elo()


Correlation between Form and ELO: 0.4768

Average ELO by Form Category:
FormCategory
High      1598.097166
Medium    1556.882591
Low       1509.858300
Name: ELO, dtype: float64


  print(df.groupby('FormCategory')['ELO'].mean().sort_values(ascending=False))


## Active and inactive players

In [31]:
def active_vs_inactive():
    """Compare active vs inactive players"""
    # Define active as played within last 30 days
    df['Status'] = df['DaysSinceLastMatch'].apply(lambda x: 'Active' if x <= 100 else 'Inactive')
    
    print("\nActive vs Inactive Players:")
    print(f"Active players: {df[df['Status'] == 'Active'].shape[0]}")
    print(f"Inactive players: {df[df['Status'] == 'Inactive'].shape[0]}")
    
    print("\nAverage ELO by player status:")
    print(df.groupby('Status')['ELO'].mean())
    
    print("\nAverage Form by player status:")
    print(df.groupby('Status')['Form'].mean())

In [32]:
active_vs_inactive()


Active vs Inactive Players:
Active players: 250
Inactive players: 491

Average ELO by player status:
Status
Active      1583.536000
Inactive    1540.389002
Name: ELO, dtype: float64

Average Form by player status:
Status
Active      0.171625
Inactive    0.141900
Name: Form, dtype: float64


In [45]:
df2 = df[df['DaysSinceLastMatch'] <= 100]
with open('Recent_ELO.csv', 'w') as f:
    f.write(df2.to_csv())

## Most valuable players based on insights and role and current

In [None]:
active_players = df[df['DaysSinceLastMatch'] <= 30]
if not active_players.empty:
    top_active = active_players.sort_values('ELO', ascending=False).head(3)
    print("\nTop 3 active players by ELO:")
    for i, (_, player) in enumerate(top_active.iterrows(), 1):
        print(f"{i}. {player['Player']} (ELO: {player['ELO']}, Role: {player['Role']}, Form: {player['Form']:.3f})")


Top 3 active players by ELO:
1. CV Varun (ELO: 1804, Role: bowler, Form: 0.253)
2. MA Starc (ELO: 1794, Role: bowler, Form: 0.305)
3. Shubman Gill (ELO: 1756, Role: batsman, Form: 0.226)


In [44]:
low_elo_high_form = df[(df['ELO'] < df['ELO'].median()) & (df['Form'] > df['Form'].median())]

print("\nPlayers with lower ELO but above-median form (potential risers):")
if not low_elo_high_form.empty:
    for _, player in low_elo_high_form.iterrows():
        print(f"- {player['Player']} (ELO: {player['ELO']}, Form: {player['Form']:.3f})")
else:
    print("None identified")


Players with lower ELO but above-median form (potential risers):
- PJ Sangwan (ELO: 1534, Form: 0.204)
- HV Patel (ELO: 1534, Form: 0.274)
- AP Dole (ELO: 1532, Form: 0.307)
- Arshdeep Singh (ELO: 1532, Form: 0.234)
- J Botha (ELO: 1532, Form: 0.172)
- SMSM Senanayake (ELO: 1532, Form: 0.280)
- KA Maharaj (ELO: 1532, Form: 0.246)
- WD Parnell (ELO: 1532, Form: 0.250)
- Harmeet Singh (ELO: 1532, Form: 0.221)
- D Jansen (ELO: 1531, Form: 0.250)
- SB Wagh (ELO: 1531, Form: 0.157)
- T Thushara (ELO: 1530, Form: 0.349)
- Mohammad Nabi (ELO: 1530, Form: 0.185)
- KC Sangakkara (ELO: 1529, Form: 0.165)
- V Pratap Singh (ELO: 1528, Form: 0.233)
- AC Voges (ELO: 1528, Form: 0.154)
- KP Pietersen (ELO: 1528, Form: 0.210)
- YA Abdulla (ELO: 1528, Form: 0.244)
- DA Warner (ELO: 1528, Form: 0.273)
- PP Chawla (ELO: 1527, Form: 0.218)
- Umar Gul (ELO: 1526, Form: 0.405)
- G Coetzee (ELO: 1526, Form: 0.211)
- Ankit Soni (ELO: 1525, Form: 0.168)
- DJ Muthuswami (ELO: 1525, Form: 0.158)
- BE Hendricks 