In [8]:
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split

##Data Preparation

In [9]:
# Sample dataset
X = np.array([[1, 1], [1, 0], [0, 1], [0, 0], [1, 1], [0, 0]])  # Features
y = np.array([0, 1, 1, 0, 0, 0])  # Labels

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

##Decision Tree
In a Decision Tree, the topmost node is known as the root node. It works by partitioning the dataset into subsets based on the feature that provides the best separation between the target variable classes. The partitioning process is done recursively, producing a tree with decision nodes and leaf nodes. A decision node has two or more branches, and a leaf node represents a classification or decision. The decision tree makes decisions by asking multiple questions and following the path down the tree that corresponds to the answer.

In [10]:
class DecisionTree:
    def __init__(self):
        self.tree = None

    def entropy(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return -np.sum(probabilities * np.log2(probabilities))

    def information_gain(self, X, y, feature_index):
        original_entropy = self.entropy(y)
        values = np.unique(X[:, feature_index])
        weighted_entropy = 0
        for value in values:
            subset = y[X[:, feature_index] == value]
            weighted_entropy += (len(subset) / len(y)) * self.entropy(subset)
        return original_entropy - weighted_entropy

    def build_tree(self, X, y, features):
        unique_classes = np.unique(y)
        if len(unique_classes) == 1 or len(features) == 0:
            return unique_classes[0]
        gains = [self.information_gain(X, y, feature) for feature in features]
        best_feature = features[np.argmax(gains)]
        features = [f for f in features if f != best_feature]
        tree = {}
        for value in np.unique(X[:, best_feature]):
            subset_X = X[X[:, best_feature] == value]
            subset_y = y[X[:, best_feature] == value]
            tree[f"{best_feature} = {value}"] = self.build_tree(subset_X, subset_y, features)
        return tree

    def fit(self, X, y):
        features = list(range(X.shape[1]))
        self.tree = self.build_tree(X, y, features)

    def predict_single(self, tree, sample):
        if not isinstance(tree, dict):
            return tree
        for node, subtree in tree.items():
            feature, value = node.split(" = ")
            feature = int(feature)
            if sample[feature] == int(value):
                return self.predict_single(subtree, sample)

    def predict(self, X):
        return [self.predict_single(self.tree, sample) for sample in X]

##Random Forest
A Random Forest is an ensemble learning method that combines multiple decision trees to create a more robust and accurate model. It's primarily used for classification tasks but can also be applied to regression problems. The idea is to build multiple decision trees during training and output the mode of the classes (classification) or mean prediction (regression) of the individual trees for unseen data.

In [11]:
class RandomForest:
    def __init__(self, n_trees, sample_size, feature_size):
        self.n_trees = n_trees
        self.sample_size = sample_size
        self.feature_size = feature_size
        self.trees = []

    def fit(self, X, y):
        for _ in range(self.n_trees):
            indices = np.random.choice(len(X), size=self.sample_size, replace=True)
            X_sample, y_sample = X[indices], y[indices]
            features = np.random.choice(X.shape[1], size=self.feature_size, replace=False)
            tree = DecisionTree()
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.array([tree.predict(X) for tree in self.trees])
        return [Counter(predictions[:, i]).most_common(1)[0][0] for i in range(len(X))]

##Model Evaluation

In [12]:
# Decision Tree
dt = DecisionTree()
dt.fit(X_train, y_train)
dt_predictions = dt.predict(X_test)
print("Decision Tree Predictions:", dt_predictions)
print("Decision Tree Accuracy:", np.mean(dt_predictions == y_test))

# Random Forest
rf = RandomForest(n_trees=3, sample_size=3, feature_size=1)  # Adjusted hyperparameters
rf.fit(X_train, y_train)
rf_predictions = rf.predict(X_test)
print("Random Forest Predictions:", rf_predictions)
print("Random Forest Accuracy:", np.mean(rf_predictions == y_test))

Decision Tree Predictions: [0, 0]
Decision Tree Accuracy: 0.5
Random Forest Predictions: [0, 0]
Random Forest Accuracy: 0.5
