# Combines all the trees to evaluate Results

## Load the Dataset

In [None]:
import numpy as np

file_path = 'Skin_NonSkin 2.txt'

# Load the data
data = np.loadtxt(file_path, delimiter='\t')

# Split the data into features and target variable
X = data[:, :-1]
y = data[:, -1].astype(int) 
y = y - 1

## Evaluate the Models 

In [None]:
#Initialize the Model

EFDT_model = EFDT()
HAT_model = HAT()

#Initialize variables
EFDT_errors_count = 0
EFDT_error_rates = []
HAT_errors_count = 0
HAT_error_rates = []

for idx in range(len(y)):
    
    #Prediction on both Models
    EFDT_pred = EFDT_model.predict(X[idx])
    HAT_pred = HAT_model.predict(X[idx])
    
    
    #Check the accruracy of the models
    if EFDT_pred != y[idx]:
        EFDT_errors_count += 1
        
    if HAT_pred != y[idx]:
        HAT_errors_count += 1
        
    # Calculate Error Rate
    if idx > 10000:
        EFDT_error_rate = EFDT_errors_count / (idx + 1)
        EFDT_error_rates.append([idx, EFDT_error_rate])
        
        HAT_error_rate = HAT_errors_count / (idx + 1)
        HAT_error_rates.append([idx, HAT_error_rate])
    
    # Print Every 10000 Iterations
    if idx % 10000 == 0:
        print(f'Instance: {idx}')
    
    # Fit the data point into the tree
    EFDT_model._fit_single(X[idx], y[idx])
    HAT_model._fit_single(X[idx], y[idx])

## Display the Results

In [None]:
import matplotlib.pyplot as plt

# Splitting the list into two lists, idxs and errors
EFDT_idxs, EFDT_errors = zip(*EFDT_error_rates)
HAT_idxs, HAT_errors = zip(*HAT_error_rates)

# Plotting
plt.figure(figsize=(10, 6))  
plt.plot(EFDT_idxs, EFDT_errors, marker='o', linestyle='-', color='b') 
plt.plot(HAT_idxs, HAT_errors, marker='o', linestyle='-', color='g') 
plt.title('Error Rate Over Time')
plt.xlabel('Index (or Time)')
plt.ylabel('Error Rate')
plt.grid(True)  # Optional: Adds a grid for easier reading
plt.show()