In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

from random_forest import RandomForest

### Constants and hyperparameters

In [2]:
train_test_split_pct = 0.2 # Percentage of data to use for testing
n_trees = 30 # Number of trees in the forest
max_depth = 150 # Maximum depth of the trees
seed = 1234 # Seed for reproducibility

### Data loading

In [3]:
# Loading the dataset
data = datasets.load_breast_cancer()

# Extracting the features and labels
X, y = data.data, data.target # type: ignore

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=train_test_split_pct, random_state=seed)

### Building the model

In [4]:
# Creating the decision tree
model = RandomForest(n_trees=n_trees, max_depth=max_depth)


### Training the model

In [5]:
# Training the decision tree
model.fit(X_train, y_train)

Training tree 30/30

### Evaluating the model

In [6]:
# Predicting the labels of the test set
y_pred = model.predict(X_test)

In [7]:
# Computing the accuracy
accuracy = np.sum(y_test == y_pred) / len(y_test)

# Printing the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.9210526315789473
