In [10]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import chi2, mutual_info_classif, f_classif
from sklearn.preprocessing import LabelEncoder
from scipy.stats import pearsonr

# Load the data
file_path = '/content/atp.csv'
df = pd.read_csv(file_path)

# Encode the target variable
df['Sex_encoded'] = LabelEncoder().fit_transform(df['Sex'])

# Define features and target
X = df[['Cumulative Weeks', 'Weeks', 'Age (years)']]
y = df['Sex_encoded']

# Make feature values non-negative for chi2 calculation
X_non_negative = np.abs(X)

# Chi-square test
chi2_vals, _ = chi2(X_non_negative, y)

# Information gain
info_gain = mutual_info_classif(X_non_negative, y)

# Entropy calculation
entropy_vals = -np.sum(X_non_negative * np.log2(X_non_negative + 1e-9), axis=0) / len(X_non_negative)

# Pearson's correlation
pearson_corr = [pearsonr(X[col], y)[0] for col in X.columns]

# ANOVA F-test
anova_vals, _ = f_classif(X, y)

# Combine results into a DataFrame
results = pd.DataFrame({
    'Feature': ['Cumulative Weeks', 'Weeks', 'Age (years)'],
    'Chi2': chi2_vals,
    'Information Gain': info_gain,
    'Entropy': entropy_vals,
    'Pearson Correlation': pearson_corr,
    'ANOVA F-value': anova_vals
})

print(results)


                           Feature      Chi2  Information Gain     Entropy  \
Cumulative Weeks  Cumulative Weeks  4.224327          0.068484 -720.044461   
Weeks                        Weeks  4.382376          0.013115 -133.389507   
Age (years)            Age (years)  3.796029          0.079069 -111.819547   

                  Pearson Correlation  ANOVA F-value  
Cumulative Weeks             0.014125       0.039112  
Weeks                        0.019301       0.073040  
Age (years)                  0.190254       7.360952  


In [11]:
print(df.columns)


Index(['Player', 'DOB', 'Cumulative Weeks', 'Date', 'Sex', 'Weeks',
       'Age (days)', 'Age (years)', '(Age)/365.25', 'Sex_encoded'],
      dtype='object')
