In [1]:
from collections import Counter
import numpy as np


# train data

In [2]:
X_train = np.array([
    ['Sunny', 'Hot', 'High', 'Weak'],
    ['Sunny', 'Hot', 'High', 'Strong'],
    ['Overcast', 'Hot', 'High', 'Weak'],
    ['Rain', 'Mild', 'High', 'Weak'],
    ['Rain', 'Cool', 'Normal', 'Weak'],
    ['Rain', 'Cool', 'Normal', 'Strong'],
    ['Overcast', 'Cool', 'Normal', 'Strong'],
    ['Sunny', 'Mild', 'High', 'Weak'],
    ['Sunny', 'Cool', 'Normal', 'Weak'],
    ['Rain', 'Mild', 'Normal', 'Weak'],
    ['Sunny', 'Mild', 'Normal', 'Strong'],
    ['Overcast', 'Mild', 'High', 'Strong'],
    ['Overcast', 'Hot', 'Normal', 'Weak'],
    ['Rain', 'Mild', 'High', 'Strong']
])

In [4]:
y_train = np.array(['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No'])


# Test data

In [6]:
X_test = np.array([
    ['Sunny', 'Hot', 'Normal', 'Weak'],
    ['Overcast', 'Cool', 'Normal', 'Strong'],
    ['Rain', 'Mild', 'High', 'Weak'],
    ['Sunny', 'Mild', 'High', 'Strong']
])

# decision tree class

In [7]:
class DecisionTree:
    def __init__(self):
        self.tree = {}

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _calculate_entropy(self, y):
        counter = Counter(y)
        entropy = 0
        total = len(y)
        for label in counter:
            probability = counter[label] / total
            entropy -= probability * np.log2(probability)
        return entropy

    def _split_data(self, X, y, feature_index, value):
        left_X, left_y, right_X, right_y = [], [], [], []
        for i, val in enumerate(X[:, feature_index]):
            if val == value:
                left_X.append(X[i])
                left_y.append(y[i])
            else:
                right_X.append(X[i])
                right_y.append(y[i])
        return np.array(left_X), np.array(left_y), np.array(right_X), np.array(right_y)

    def _find_best_split(self, X, y):
        best_entropy = float('inf')
        best_feature_index = -1
        best_value = None

        for feature_index in range(X.shape[1]):
            unique_values = set(X[:, feature_index])
            for value in unique_values:
                left_X, left_y, right_X, right_y = self._split_data(X, y, feature_index, value)
                total_entropy = (len(left_y) / len(y)) * self._calculate_entropy(left_y) + \
                                (len(right_y) / len(y)) * self._calculate_entropy(right_y)
                if total_entropy < best_entropy:
                    best_entropy = total_entropy
                    best_feature_index = feature_index
                    best_value = value

        return best_feature_index, best_value

    def _build_tree(self, X, y):
        if len(set(y)) == 1:
            return {'prediction': y[0], 'probabilities': {y[0]: 1.0}}
        
        if len(X) == 0:
            most_common_label = Counter(y).most_common(1)[0][0]
            return {'prediction': most_common_label, 'probabilities': {most_common_label: 1.0}}

        best_feature_index, best_value = self._find_best_split(X, y)
        left_X, left_y, right_X, right_y = self._split_data(X, y, best_feature_index, best_value)

        return {
            'feature_index': best_feature_index,
            'value': best_value,
            'left': self._build_tree(left_X, left_y),
            'right': self._build_tree(right_X, right_y)
        }

    def _predict_single(self, sample, tree):
        if 'prediction' in tree:
            return tree['prediction'], tree['probabilities']
        feature_index = tree['feature_index']
        value = tree['value']
        if sample[feature_index] == value:
            return self._predict_single(sample, tree['left'])
        else:
            return self._predict_single(sample, tree['right'])

    def predict(self, X):
        predictions = []
        probabilities = []
        for sample in X:
            prediction, probs = self._predict_single(sample, self.tree)
            predictions.append(prediction)
            probabilities.append(probs)
        return predictions, probabilities



In [8]:
tree = DecisionTree()
tree.fit(X_train, y_train)

In [9]:
predictions, probabilities = tree.predict(X_test)

# Prediction and probability 

In [10]:
print("Predictions:", predictions)
print("Probabilities:", probabilities)


Predictions: ['Yes', 'Yes', 'Yes', 'No']
Probabilities: [{'Yes': 1.0}, {'Yes': 1.0}, {'Yes': 1.0}, {'No': 1.0}]
