**Name:** Keval R Shah  
**SAP:** 60009220061  
**Batch :** D2-1  
**Lab:** ML Lab 2


In [1]:
!ls

sample_data  tennis.xlsx


In [3]:
import numpy as np
import pandas as pd

In [4]:
df = pd.read_excel('tennis.xlsx')

In [5]:
df.head(5)

Unnamed: 0,day,outlook,temp,humidity,wind,tennis
0,d1,sunny,hot,high,weak,no
1,d2,sunny,hot,high,strong,no
2,d3,overcast,hot,high,weak,yes
3,d4,rain,mild,high,weak,yes
4,d5,rain,cool,normal,weak,yes


In [81]:
df.isna().sum()

outlook     0
temp        0
humidity    0
wind        0
tennis      0
dtype: int64

In [82]:
df.shape

(14, 5)

In [6]:
def totalEntropy(df, target):
    possibilities = df[target].unique()
    entropy = 0

    for answer in possibilities:
        P = df[target].value_counts()[answer] / len(df[target])
        entropy += -P * np.log2(P)

    return entropy

In [62]:
totalEntropy(df,'tennis')

0.9402859586706311

In [7]:
def informationGain(df, attribute, target):
    total_entropy = totalEntropy(df, target)

    # weighted average entropy after the split
    weighted_entropy = 0
    values = df[attribute].unique()

    for value in values:
        subset = df[df[attribute] == value]
        weight = len(subset) / len(df)
        weighted_entropy += weight * totalEntropy(subset, target)
    information_gain = total_entropy - weighted_entropy
    return information_gain

In [8]:
def AttrEntropy(df, attr):
    target = df.keys()[-1]
    targVars = df[target].unique()
    attrVars = df[attr].unique()
    entropy2 = 0

    for variable in attrVars:
        entropy = 0

        for ans in targVars:
            numerator = len(df[attr][df[attr] == variable][df[target] == ans])
            denominator = len(df[attr][df[attr] == variable])

            P = numerator / denominator

            if P == 0:
                entropy += 0
            else:
                entropy += -P * np.log2(P)

        P2 = denominator / len(df)
        entropy2 += -P2 * entropy

    return abs(entropy2)


In [64]:
AttrEntropy(df, attr="outlook")

0.6935361388961918

In [66]:
AttrEntropy(df, attr="temp")

0.9110633930116763

In [68]:
AttrEntropy(df, attr="wind")

0.8921589282623617

In [69]:
AttrEntropy(df, attr="humidity")

0.7884504573082896

In [70]:
class DecisionTreeNode:
    def __init__(self, attribute=None, value=None, result=None, left=None, right=None):
        self.attribute = attribute
        self.value = value
        self.result = result
        self.left = left
        self.right = right

In [71]:
def buildDecisionTree(df, attributes, target, depth=0, max_depth=float('inf')):
    if should_stop_growing(depth, max_depth):
        return DecisionTreeNode(result=df[target].mode().iloc[0])

    best_attribute = max(attributes, key=lambda attr: informationGain(df, attr, target))
    node = DecisionTreeNode(attribute=best_attribute, value=None, result=None, left=None, right=None)

    for value in df[best_attribute].unique():
        subset = df[df[best_attribute] == value]
        if len(subset) == 0:
            child_node = DecisionTreeNode(result=df[target].mode().iloc[0])
        else:
            child_node = buildDecisionTree(subset, attributes, target, depth=depth+1, max_depth=max_depth)

        if node.left is None:
            node.left = child_node
            node.value = value
        else:
            node.right = child_node

    return node


In [72]:
def predict(node, instance):
    if node.result is not None:
        return node.result
    else:
        attribute_value = instance.get(node.attribute, None)

        if attribute_value is not None and attribute_value == node.value:
            return predict(node.left, instance)
        elif attribute_value is not None:
            return predict(node.right, instance)
        else:
            return None


In [74]:
X = df[['outlook', 'temp', 'humidity', 'wind']]
y = df['tennis']

In [75]:
root_node = buildDecisionTree(pd.concat([X, y], axis=1), list(X.columns), 'tennis', max_depth=3)

In [76]:
case1 = {'outlook': 'sunny', 'temp': 'hot', 'humidity': 'high', 'wind': 'weak'}

predicted_result = predict(root_node, case1)
print("Predicted Result:", predicted_result)

Predicted Result: no


In [78]:
case1 = {'outlook': 'rain', 'temp': 'mild', 'humidity': 'high', 'wind': 'weak'}

predicted_result = predict(root_node, case1)
print("Predicted Result:", predicted_result)

Predicted Result: yes
