In [None]:
import pandas as pd
from collections import Counter
import math

def load_enjoysport():
    """Loads EnjoySport dataset from CSV file attached."""
    data =pd.read_csv('/content/enjoysports.csv',header=None)
    data.columns =['Sky','AirTemp','Humidity','Wind','Water','Forecast','EnjoySport']
    return data

def entropy_value(data):
    """Calculates the ENTROPY value of enjoysports dataset."""
    c_c =Counter(data['EnjoySport'])
    total_s =len(data)
    entropy =0
    for count in c_c.values():
        probability =count /total_s
        entropy -=probability*(math.log2(probability))
    return entropy

def i_g_value(data, attribute):
    """Calculates the IG value of given attribute."""
    unique_values =data[attribute].unique()
    parent_entropy = entropy_value(data)
    weighted_entropy =0
    for value in unique_values:
        subset =data[data[attribute] ==value]
        subset_entropy =entropy_value(subset)
        weighted_entropy +=(len(subset)/len(data))*subset_entropy
    i_g_value =parent_entropy-weighted_entropy
    return i_g_value

def decision_tree(data):
    """Creates the decision tree."""
    if len(data['EnjoySport'].unique()) ==1:
        return data['EnjoySport'].iloc[0]
    if len(data) ==0:
        return None
    if len(data.columns) ==1:
        return Counter(data['EnjoySport']).more_common()[0][0]
    best_attribute =max(data.columns[:-1],key=lambda attr:i_g_value(data,attr))
    tree ={best_attribute: {}}
    for value in data[best_attribute].unique():
        subset =data[data[best_attribute] ==value]
        subtree =decision_tree(subset)
        tree[best_attribute][value] =subtree
    return tree

data =load_enjoysport()
d_t =decision_tree(data)
print(d_t)

{'Sky': {'sky': 'enjoysport', 'sunny': 'yes', 'rainy': 'no'}}
