**Headers**

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from collections import Counter
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

**Random Forest Classifier Implementation from Scratch in Python**

In [None]:
class Random_Forest_Classifier:
    def __init__(self, n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1, random_state=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.random_state = random_state
        self.estimators = []

    def fit(self, X, y):
        np.random.seed(self.random_state)
        for _ in range(self.n_estimators):
            indices = np.random.choice(X.shape[0], X.shape[0], replace=True)
            X_subset, y_subset = X[indices], y[indices]
            tree = DecisionTreeClassifier(max_depth=self.max_depth,
                                          min_samples_split=self.min_samples_split,
                                          min_samples_leaf=self.min_samples_leaf)
            tree.fit(X_subset, y_subset)
            self.estimators.append(tree)

    def predict(self, X):
        predictions = np.array([tree.predict(X) for tree in self.estimators])
        majority_vote = []
        for sample_predictions in predictions.T:
            counts = Counter(sample_predictions)
            majority_vote.append(counts.most_common(1)[0][0])
        return np.array(majority_vote)

In [None]:
penguins = pd.read_csv('penguins_train.csv')
X = penguins.drop('Species', axis=1)
y = penguins['Species']

X = penguins.drop('Species', axis=1)
y = penguins['Species']
numeric_column_means = X.select_dtypes(include=['int', 'float']).mean()
X.fillna(numeric_column_means, inplace=True)
cat_columns = X.select_dtypes(include=['object'])
X_encoded = pd.get_dummies(X, columns=cat_columns.columns)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=31)

n_estimators = 100
rf_classifier = Random_Forest_Classifier(n_estimators=n_estimators, random_state=42)
rf_classifier.fit(X_train.values, y_train.values)
yhat_test = rf_classifier.predict(X_test.values)

accuracy = accuracy_score(y_test, yhat_test)
print("Predictions on the test dataset:")
print(yhat_test)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, yhat_test))
print("Confusion Matrix:")
print(confusion_matrix(y_test, yhat_test))