# Lab 09

#### Q1

In [1]:
import pandas as pd
import numpy as np

In [2]:
class DecisionTreeNode:
    def __init__(self, attribute=None, value=None, left=None, right=None, label=None):
        self.attribute = attribute
        self.value = value
        self.left = left
        self.right = right
        self.label = label  

In [3]:
class DecisionTree:
    def __init__(self):
        self.root = None
    def fit(self, data):
        self.root = self.build_tree(data)
    def entropy(self, labels):
        value_counts = labels.value_counts(normalize=True)
        return -np.sum(value_counts * np.log2(value_counts + 1e-9))
    def information_gain(self, data, split_attribute, target):
        totalentropy = self.entropy(data[target])
        value_counts = data[split_attribute].value_counts(normalize=True)
        weightedentropy = sum(value_counts[v] * self.entropy(data[data[split_attribute] == v][target]) for v in value_counts.index)
        return totalentropy - weightedentropy
    def best_split(self, data, target):
        best_gain = -1
        best_attribute = None
        for attribute in data.columns[:-1]:
            gain = self.information_gain(data, attribute, target)
            if gain > best_gain:
                best_gain = gain
                best_attribute = attribute
        return best_attribute
    def build_tree(self, data):
        target = data.columns[-1]
        labels = data[target]
        if len(labels.unique()) == 1:
            return DecisionTreeNode(label=labels.iloc[0])
        if len(data.columns) == 1:
            return DecisionTreeNode(label=labels.mode()[0])
        best_attribute = self.best_split(data, target)
        node = DecisionTreeNode(attribute=best_attribute)
        for value in data[best_attribute].unique():
            subset = data[data[best_attribute] == value]
            child_node = self.build_tree(subset.drop(columns=[best_attribute]))
            if node.left is None:
                node.left = child_node
                node.value = value
            else:
                node.right = child_node
        return node
    def classify(self, node, sample):
        if node.label is not None:
            return node.label
        attribute_value = sample[node.attribute]
        if attribute_value == node.value:
            return self.classify(node.left, sample) if node.left else node.label
        else:
            return self.classify(node.right, sample) if node.right else node.label
    def predict(self, sample):
        return self.classify(self.root, sample)

In [4]:
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 
                'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temp': [85, 80, 83, 70, 68, 65, 64, 72, 69, 75, 75, 72, 81, 71],
    'Humidity': [85, 90, 78, 96, 80, 70, 65, 95, 70, 80, 70, 90, 75, 80],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 
             'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 
             'Yes', 'Yes', 'Yes', 'No']
}

In [5]:
df = pd.DataFrame(data)
df

Unnamed: 0,Outlook,Temp,Humidity,Wind,Play
0,Sunny,85,85,Weak,No
1,Sunny,80,90,Strong,No
2,Overcast,83,78,Weak,Yes
3,Rain,70,96,Weak,Yes
4,Rain,68,80,Weak,Yes
5,Rain,65,70,Strong,No
6,Overcast,64,65,Strong,Yes
7,Sunny,72,95,Weak,No
8,Sunny,69,70,Weak,Yes
9,Rain,75,80,Weak,Yes


In [6]:
tree = DecisionTree()
tree.fit(df)

In [7]:
new_sample = {
    'Outlook': 'Sunny',
    'Temp': 75,
    'Humidity': 70,
    'Wind': 'Weak'
}

In [8]:
new_sample_df = pd.DataFrame([new_sample])

In [9]:
new_sample_df

Unnamed: 0,Outlook,Temp,Humidity,Wind
0,Sunny,75,70,Weak


In [10]:
prediction = tree.predict(new_sample_df.iloc[0])

In [11]:
prediction

'No'

#### Q2

In [12]:
class DecisionTreeNode:
    def __init__(self, attribute=None, value=None, left=None, right=None, label=None):
        self.attribute = attribute
        self.value = value
        self.left = left
        self.right = right
        self.label = label  

In [13]:
class DecisionTree:
    def __init__(self):
        self.root = None
    def fit(self, data):
        self.root = self.build_tree(data)
    def gini_impurity(self, labels):
        total_count = len(labels)
        if total_count == 0:
            return 0
        proportions = labels.value_counts(normalize=True)
        return 1 - sum(proportions ** 2)
    def information_gain(self, data, split_attribute, target):
        total_impurity = self.gini_impurity(data[target])
        weighted_impurity = 0
        for value in data[split_attribute].unique():
            subset = data[data[split_attribute] == value]
            weighted_impurity += (len(subset) / len(data)) * self.gini_impurity(subset[target])
        return total_impurity - weighted_impurity
    def best_split(self, data, target):
        best_gain = -1
        best_attribute = None
        for attribute in data.columns[:-1]:
            gain = self.information_gain(data, attribute, target)
            if gain > best_gain:
                best_gain = gain
                best_attribute = attribute
        return best_attribute
    def build_tree(self, data):
        target = data.columns[-1]
        labels = data[target]
        if len(labels.unique()) == 1:
            return DecisionTreeNode(label=labels.iloc[0])
        if len(data.columns) == 1:
            return DecisionTreeNode(label=labels.mode()[0])
        best_attribute = self.best_split(data, target)
        node = DecisionTreeNode(attribute=best_attribute)
        for value in data[best_attribute].unique():
            subset = data[data[best_attribute] == value]
            child_node = self.build_tree(subset.drop(columns=[best_attribute]))
            if node.left is None:
                node.left = child_node
                node.value = value
            else:
                node.right = child_node
        return node
    def classify(self, node, sample):
        if node.label is not None:
            return node.label
        attribute_value = sample[node.attribute]
        if attribute_value == node.value:
            return self.classify(node.left, sample) if node.left else node.label
        else:
            return self.classify(node.right, sample) if node.right else node.label
    def predict(self, sample):
        return self.classify(self.root, sample)

In [14]:
tree = DecisionTree()
tree.fit(df)

In [15]:
prediction = tree.predict(new_sample_df.iloc[0])

In [16]:
prediction

'No'