Consistency Score All-Rounder Score = (Batting Consistency Score + Bowling Consistency Score) / 2 + W1 * (Catches Taken / Mat)

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def analyze_and_visualize(df):
    # Define features for analysis.
    features = ['Catches Taken', 'Bat_Mat', 'Bat_Inns', 'NO', 'Bat_Runs', 'HS', 'Bat_Ave', 'BF', 'Bat_SR', 
                "100's", "50's", "Zeros", '4s', '6s', 'Bowl_Mat', 'Bowl_Inns', 'Overs', 'Mdns', 'Bowl_Runs', 
                'Wkts', 'Bowl_Ave', 'Econ', 'Bowl_SR', '4W', '5W']
    
    # Check if all features exist in the DataFrame
    missing_features = [col for col in features if col not in df.columns]
    if missing_features:
        raise ValueError(f"The following required columns are missing in the DataFrame: {missing_features}")
    
    # Select features from the DataFrame
    df_analysis = df[features]
    
    # Replace infinities and NaN values with 0
    df_analysis = df_analysis.replace([np.inf, -np.inf], 0)
    df_analysis = df_analysis.fillna(0)
    
    # Correlation Analysis
    correlation_matrix = df_analysis.corr()
    
    # Visualize the correlation matrix using a heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
    plt.title('Correlation Matrix of Features')
    plt.show()
    
    return df_analysis

In [None]:
def calculate_allrounder_formula_components(df):
    """
    Calculates the components of the All-Rounder Consistency Score formula.
    
    Consistency Score All-Rounder Score = (Batting Consistency Score + Bowling Consistency Score) / 2 
                                         + W1 * (Catches Taken / Mat)
    """
    df_components = df.copy()
    # Ensure numeric types for calculations (handle potential NaN from coercion)
    df_components['Bat_Mat'] = pd.to_numeric(df_components['Bat_Mat'], errors='coerce')
    df_components['Bat_Inns'] = pd.to_numeric(df_components['Bat_Inns'], errors='coerce')
    df_components['NO'] = pd.to_numeric(df_components['NO'], errors='coerce')
    df_components['Bat_Runs'] = pd.to_numeric(df_components['Bat_Runs'], errors='coerce')
    df_components['BF'] = pd.to_numeric(df_components['BF'], errors='coerce')
    df_components['Bat_Ave'] = pd.to_numeric(df_components['Bat_Ave'], errors='coerce')
    df_components["100's"] = pd.to_numeric(df_components["100's"], errors='coerce')
    df_components["50's"] = pd.to_numeric(df_components["50's"], errors='coerce')
    df_components["Zeros"] = pd.to_numeric(df_components["Zeros"], errors='coerce')
    df_components['Bowl_Inns'] = pd.to_numeric(df_components['Inns'], errors='coerce')
    df_components['Bowl_Mat'] = pd.to_numeric(df_components['Mat'], errors='coerce')
    df_components['Overs'] = pd.to_numeric(df_components['Overs'], errors='coerce')
    df_components['Wkts'] = pd.to_numeric(df_components['Wkts'], errors='coerce')
    df_components['Bowl_Ave'] = pd.to_numeric(df_components['Bowl_Ave'], errors='coerce')
    df_components['Econ'] = pd.to_numeric(df_components['Econ'], errors='coerce')
    df_components['Bowl_SR'] = pd.to_numeric(df_components['Bowl_SR'], errors='coerce')
    df_components['4W'] = pd.to_numeric(df_components['4W'], errors='coerce')
    df_components['5W'] = pd.to_numeric(df_components['5W'], errors='coerce')
    
    

    # Perform calculations, ensure float division and handle potential NaN values
    df_components['Inns_per_Mat_Bat'] = df_components['Bat_Inns'].astype(float) / df_components['Bat_Mat'].astype(float)
    df_components['NO_per_Inns'] = df_components['NO'].astype(float) / df_components['Bat_Inns'].astype(float)
    df_components['Runs_per_BF'] = df_components['Bat_Runs'].astype(float) / df_components['BF'].astype(float)
    df_components['Ave_Score_Bat'] = df_components['Bat_Ave'].astype(float)  # Direct Ave
    df_components['50s100s_per_Inns'] = (df_components["50's"].astype(float) + 2 * df_components["100's"].astype(float)) / df_components['Bat_Inns'].astype(float)
    df_components['Zeros_per_Inns'] = df_components["Zeros"].astype(float) / df_components['Bat_Inns'].astype(float)
    df_components['Inns_per_Mat_bowl'] = df_components['Bowl_Inns'].astype(float) / df_components['Bowl_Mat'].astype(float) # Ensure float division
    df_components['Overs_per_Mat'] = df_components['Overs'].astype(float) / df_components['Bowl_Mat'].astype(float)
    df_components['Wkts_per_Overs'] = df_components['Wkts'].astype(float) / df_components['Overs'].astype(float)
    df_components['Ave_Score_Bat'] = df_components['Bowl_Ave'].astype(float)  # Direct Ave
    df_components['Econ_Rate'] = df_components['Econ'].astype(float)  # Direct Econ
    df_components['SR_per_100'] = df_components['Bowl_SR'].astype(float) / 100
    df_components['4W5W_per_Mat'] = (df_components['4W'].astype(float) + df_components['5W'].astype(float)) / df_components['Bowl_Mat'].astype(float)
    # Fielding component: Catches Taken per Match
    df_components['Catches_per_Mat'] = df_components['Catches Taken'].astype(float) / df_components['Bowl_Mat'].astype(float)

    return df_components


In [6]:
data = pd.read_excel("../../all season cleaned data/allrounderset_ipl.xlsx")
df = data.copy()
print("Data types of your columns before calculation:")

Data types of your columns before calculation:


In [8]:
numeric_cols = ['Bat_Mat', 'Bat_Inns', 'NO', 'Bat_Runs', 'BF', 'Bat_Ave', "100's", "50's", "Zeros", 'Bowl_Inns', 'Bowl_Mat', 'Overs', 'Wkts', 'Bowl_Ave', 'Econ', 'Bowl_SR', '4W', '5W', 'Catches Taken']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce') # Use df here

# Verify data types AFTER explicit conversion
print("\nData types of your columns AFTER explicit numeric conversion:")
print(df.dtypes) # Use df here

# --- Correctly Remove Rows with NaN Values ---
cols_to_check_nan = ['Bat_Inns', 'Bowl_Inns', 'Bat_Mat', 'Overs', 'Wkts', 'Bowl_Ave', 'Econ', 'Bowl_SR', '4W', '5W', 'Catches Taken']
print("\nNumber of NaN values BEFORE removing:")
print(df[cols_to_check_nan].isna().sum()) # Check NaN counts in relevant columns before removal

df_cleaned = df.dropna(subset=cols_to_check_nan, how='any') # Use how='any' to drop row if ANY of these cols is NaN
print("\nNumber of rows BEFORE removing NaNs:", len(df))
print("Number of rows AFTER removing NaNs:", len(df_cleaned)) # Print length of df_cleaned


# Calculate bowler formula components using the CLEANED data (df_cleaned)
calculations_with_data = calculate_allrounder_formula_components(df_cleaned) # Pass df_cleaned

# Select the component columns for correlation analysis
component_columns = [
    'Inns_per_Mat_Bat',
    'NO_per_Inns',
    'Runs_per_BF',
    'Ave_Score_Bat',
    '50s100s_per_Inns',
    'Zeros_per_Inns',
    'Inns_per_Mat_bowl',
    'Overs_per_Mat',
    'Wkts_per_Overs',
    'Econ_Rate',
    'SR_per_100',
    '4W5W_per_Mat',
    'Catches_per_Mat'
]

# Calculate the correlation matrix for the components
correlation_matrix = calculations_with_data[component_columns].corr() # Use calculations_with_data

# Display the correlation matrix
print("\nCorrelation Matrix of Bowler Consistency Formula Components (after NaN removal):")
print(correlation_matrix)


Data types of your columns AFTER explicit numeric conversion:
Player            object
Span              object
Catches Taken      int64
Bat_Mat            int64
Bat_Inns         float64
NO               float64
Bat_Runs         float64
HS                object
Bat_Ave          float64
BF               float64
Bat_SR            object
100's            float64
50's             float64
Zeros            float64
4s                object
6s                object
Bowl_Mat           int64
Bowl_Inns        float64
Overs            float64
Mdns              object
Bowl_Runs         object
Wkts             float64
Bowl_Ave         float64
Econ             float64
Bowl_SR          float64
4W               float64
5W               float64
dtype: object

Number of NaN values BEFORE removing:
Bat_Inns          1
Bowl_Inns         6
Bat_Mat           0
Overs             6
Wkts              6
Bowl_Ave         11
Econ              6
Bowl_SR          11
4W                6
5W                6
Catches T

KeyError: 'Inns'