In [None]:
# With Library
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
# Without Library
import numpy as np
from collections import Counter

# Define functions for decision tree and random forest

def entropy(y):
    counter = Counter(y)
    entropy = 0
    for label in counter:
        probability = counter[label] / len(y)
        entropy -= probability * np.log2(probability)
    return entropy

def information_gain(X, y, feature_index, threshold):
    left_indices = X[:, feature_index] < threshold
    right_indices = ~left_indices
    left_entropy = entropy(y[left_indices])
    right_entropy = entropy(y[right_indices])
    total_entropy = entropy(y)
    return total_entropy - (left_entropy * sum(left_indices) / len(y)) - (right_entropy * sum(right_indices) / len(y))

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_classes_ = len(set(y))
        self.n_features_ = X.shape[1]
        self.tree_ = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(set(y))

        if (self.max_depth is not None and depth >= self.max_depth) or n_labels == 1:
            return Counter(y).most_common(1)[0][0]

        best_feature = None
        best_threshold = None
        best_gain = -1

        for feature_index in range(n_features):
            thresholds = sorted(set(X[:, feature_index]))
            for threshold in thresholds:
                gain = information_gain(X, y, feature_index, threshold)
                if gain > best_gain:
                    best_feature = feature_index
                    best_threshold = threshold
                    best_gain = gain

        left_indices = X[:, best_feature] < best_threshold
        right_indices = ~left_indices

        if sum(left_indices) == 0 or sum(right_indices) == 0:
            return Counter(y).most_common(1)[0][0]

        left_tree = self._grow_tree(X[left_indices], y[left_indices], depth + 1)
        right_tree = self._grow_tree(X[right_indices], y[right_indices], depth + 1)

        return (best_feature, best_threshold, left_tree, right_tree)

    def predict(self, X):
        return [self._predict(inputs) for inputs in X]

    def _predict(self, inputs):
        node = self.tree_
        while isinstance(node, tuple):
            feature, threshold, left_subtree, right_subtree = node
            if inputs[feature] < threshold:
                node = left_subtree
            else:
                node = right_subtree
        return node

class RandomForest:
    def __init__(self, n_estimators=100, max_depth=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth)
            indices = np.random.choice(len(X), len(X), replace=True)
            tree.fit(X[indices], y[indices])
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.zeros((X.shape[0], len(self.trees)), dtype=int)
        for i, tree in enumerate(self.trees):
            predictions[:, i] = tree.predict(X)
        return [Counter(row).most_common(1)[0][0] for row in predictions]

# Example usage with Iris dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_classifier_custom = RandomForest(n_estimators=100, max_depth=None)
rf_classifier_custom.fit(X_train, y_train)

# Make predictions on the test set
y_pred_custom = rf_classifier_custom.predict(X_test)

# Evaluate the model
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print("Accuracy (Custom Random Forest):", accuracy_custom)
