In [8]:
import pandas as pd
import numpy as np
from math import log2

# Data from the given table
data = ({
    'Athlete': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
    'Training Hours': ['High', 'Medium', 'Low', 'High', 'Medium', 'Low', 'High', 'Medium', 'Low', 'High'],
    'Rest Hours': ['Low', 'High', 'Medium', 'Medium', 'High', 'Low', 'Low', 'Low', 'High', 'High'],
    'Gym Workouts': ['Low', 'Medium', 'High', 'Medium', 'Low', 'High', 'Medium', 'Low', 'Medium', 'High'],
    'Performance': ['Lose', 'Win', 'Lose', 'Win', 'Win', 'Lose', 'Win', 'Lose', 'Win', 'Win']
})

# Create a DataFrame
df = pd.DataFrame(data)
print(df)

  Athlete Training Hours Rest Hours Gym Workouts Performance
0       A           High        Low          Low        Lose
1       B         Medium       High       Medium         Win
2       C            Low     Medium         High        Lose
3       D           High     Medium       Medium         Win
4       E         Medium       High          Low         Win
5       F            Low        Low         High        Lose
6       G           High        Low       Medium         Win
7       H         Medium        Low          Low        Lose
8       I            Low       High       Medium         Win
9       J           High       High         High         Win


In [9]:


# Function to calculate entropy
def entropy(column):
    probabilities = column.value_counts(normalize=True)
    return -sum(probabilities * np.log2(probabilities))

# Function to calculate information gain
def information_gain(df, attribute, target):
    # Calculate the entropy of the whole target column
    total_entropy = entropy(df[target])

    # Calculate the weighted entropy of each value of the attribute
    values = df[attribute].unique()
    weighted_entropy = 0
    for value in values:
        subset = df[df[attribute] == value]
        weighted_entropy += (len(subset) / len(df)) * entropy(subset[target])

    # Information gain is the difference in entropy
    print(weighted_entropy)
    return total_entropy - weighted_entropy

# Function to calculate intrinsic value
def intrinsic_value(df, attribute):
    # Calculate the frequency of each unique value in the attribute
    probabilities = df[attribute].value_counts(normalize=True)
    return -sum(probabilities * np.log2(probabilities))

# Function to calculate gain ratio
def gain_ratio(df, attribute, target):
    info_gain = information_gain(df, attribute, target)
    intr_value = intrinsic_value(df, attribute)

    if intr_value == 0:  # To avoid division by zero
        return 0

    return info_gain / intr_value

# Calculate Gain Ratio for each attribute
attributes = ['Training Hours', 'Rest Hours', 'Gym Workouts']
target = 'Performance'

for attribute in attributes:
    ratio = gain_ratio(df, attribute, target)
    print(f'Gain Ratio for {attribute}: {ratio:.4f}')

# max gain ratio among feature
best_feature = max(attributes, key=lambda x: gain_ratio(df, x, target))
print(f"\nThe feature with the highest Gain Ratio is: {best_feature}")

0.8754887502163469
Gain Ratio for Training Hours: 0.0608
0.5245112497836532
Gain Ratio for Rest Hours: 0.2933
0.5509775004326937
Gain Ratio for Gym Workouts: 0.2673
0.8754887502163469
0.5245112497836532
0.5509775004326937

The feature with the highest Gain Ratio is: Rest Hours
