## Libraries

In [1]:
import numpy as np
import torch
import math
from cvxopt import matrix, solvers
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier


## Data classes

In [8]:
class X_values:
    def __init__(self, dataset):
        self.feature_dim = dataset.shape[1]
        self.data_size = dataset.shape[0]
        self.tensor_form = torch.tensor(dataset, dtype=torch.float32)
        print(f"X_values created successfully with dimensions {self.tensor_form.shape}, feature_dim: {self.feature_dim}, data_size: {self.data_size}")

class Y_values:
    def __init__(self, targets):
        self.data_size = targets.shape[0]
        self.tensor_form = torch.tensor(targets, dtype=torch.float32)
        print(f"Y_values created successfully with dimensions {self.tensor_form.shape}")


class Dataset: # !! Use your classes while constructing an instance !!
    def __init__(self, x_values, y_values):
        self.x_tensor = x_values.tensor_form
        self.y_tensor = y_values.tensor_form

        if self.x_tensor.shape[0] != self.y_tensor.shape[0]:
            raise ValueError("Mismatch between X and y dimensions")

        self.feature_dim = x_values.feature_dim
        self.data_size = x_values.data_size

        print(f"Dataset created with X: {self.x_tensor.shape}, Y: {self.y_tensor.shape}")
        print()


## Related functions

In [3]:
def split_dataset(dataset, train_rate, val_rate, test_rate):
    if abs(train_rate + val_rate + test_rate - 1.0) > 1e-6:
        raise ValueError("Split rates must sum to 1")

    total_size = dataset.x_tensor.shape[0]
    train_size = int(total_size * train_rate)
    val_size = int(total_size * val_rate)

    indices = torch.randperm(total_size)
    train_indices = indices[:train_size]
    val_indices = indices[train_size:train_size + val_size]
    test_indices = indices[train_size + val_size:]

    x_train, y_train = dataset.x_tensor[train_indices], dataset.y_tensor[train_indices]
    x_val, y_val = dataset.x_tensor[val_indices], dataset.y_tensor[val_indices]
    x_test, y_test = dataset.x_tensor[test_indices], dataset.y_tensor[test_indices]

    train_dataset = Dataset(X_values(x_train.numpy()), Y_values(y_train.numpy()))
    val_dataset = Dataset(X_values(x_val.numpy()), Y_values(y_val.numpy()))
    test_dataset = Dataset(X_values(x_test.numpy()), Y_values(y_test.numpy()))

    return train_dataset, val_dataset, test_dataset

## Random Forest (by library)

In [4]:

class RandomForestModel:
    def __init__(self, n_estimators=100, max_depth=None, random_state=None):
        """
        Initialize the Random Forest model with Scikit-learn.
        """
        self.model = RandomForestClassifier(n_estimators=n_estimators,
                                            max_depth=max_depth,
                                            random_state=random_state)
        print(f"RandomForestModel initialized with {n_estimators} trees and max depth {max_depth}")

    def train(self, train_dataset):
        """
        Train the model using the train_dataset.
        """
        # Convert PyTorch tensors to NumPy arrays
        X_train = train_dataset.x_tensor.numpy()
        Y_train = train_dataset.y_tensor.numpy()

        # Fit the model
        self.model.fit(X_train, Y_train)
        print("RandomForestModel training completed!")

    def evaluate(self, dataset, metric='accuracy'):
        """
        Evaluate the model using the given dataset and metric.
        """
        # Convert PyTorch tensors to NumPy arrays
        X = dataset.x_tensor.numpy()
        Y = dataset.y_tensor.numpy()

        # Predict and calculate the accuracy
        predictions = self.model.predict(X)
        if metric == 'accuracy':
            score = accuracy_score(Y, predictions)
            print(f"Accuracy: {score:.4f}")
            return score
        elif metric == 'classification_report':
            report = classification_report(Y, predictions)
            print("Classification Report:")
            print(report)
            return report
        else:
            raise ValueError(f"Unsupported metric: {metric}")

    def predict(self, x_values):
        """
        Predict labels for the input x_values.
        """
        X = x_values.tensor_form.numpy()
        predictions = self.model.predict(X)
        return torch.tensor(predictions, dtype=torch.float32)

## Training algorithm

## Testing algorithm

## Running the code

In [9]:
# Load dataset (Iris dataset in this example)
iris = load_iris()
X, y = iris.data, iris.target

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into train, validation, and test sets (initially 0.7-0.15-0.15)
train_rate, val_rate, test_rate = 0.5, 0.25, 0.25
x_values = X_values(X)
y_values = Y_values(y)
dataset = Dataset(x_values, y_values)
train_dataset, val_dataset, test_dataset = split_dataset(dataset, train_rate, val_rate, test_rate)

# Initialize Random Forest Model
random_forest = RandomForestModel(n_estimators=100, max_depth=None, random_state=42)

# Train the model
random_forest.train(train_dataset)

# Evaluate on validation set
print("Validation Set Evaluation:")
random_forest.evaluate(val_dataset)

# Evaluate on test set
print("Test Set Evaluation:")
random_forest.evaluate(test_dataset)

X_values created successfully with dimensions torch.Size([150, 4]), feature_dim: 4, data_size: 150
Y_values created successfully with dimensions torch.Size([150])
Dataset created with X: torch.Size([150, 4]), Y: torch.Size([150])

X_values created successfully with dimensions torch.Size([75, 4]), feature_dim: 4, data_size: 75
Y_values created successfully with dimensions torch.Size([75])
Dataset created with X: torch.Size([75, 4]), Y: torch.Size([75])

X_values created successfully with dimensions torch.Size([37, 4]), feature_dim: 4, data_size: 37
Y_values created successfully with dimensions torch.Size([37])
Dataset created with X: torch.Size([37, 4]), Y: torch.Size([37])

X_values created successfully with dimensions torch.Size([38, 4]), feature_dim: 4, data_size: 38
Y_values created successfully with dimensions torch.Size([38])
Dataset created with X: torch.Size([38, 4]), Y: torch.Size([38])

RandomForestModel initialized with 100 trees and max depth None
RandomForestModel training 

0.9736842105263158