# Combines all the trees to evaluate Results

## Load the Dataset

In [1]:
import numpy as np

file_path = 'Skin_Data/Skin_NonSkin 2.txt'

# Load the data
data = np.loadtxt(file_path, delimiter='\t')

# Split the data into features and target variable
X = data[:, :-1]
y = data[:, -1].astype(int) 
y = y - 1

## Evaluate the Models 

In [2]:
#Initialize the Model
from all_trees import EFDT, HAT, EFHAT

EFDT_model = EFDT()
HAT_model = HAT()
EFHAT_model = EFHAT()


#Initialize variables
EFDT_errors_count = 0
EFDT_error_rates = []

HAT_errors_count = 0
HAT_error_rates = []

EFHAT_errors_count = 0
EFHAT_error_rates = []

for idx in range(len(y)):
    
    #Prediction on both Models
    EFDT_pred = EFDT_model.predict(X[idx])
    HAT_pred = HAT_model.predict(X[idx])
    EFHAT_pred = HAT_model.predict(X[idx])
    
    
    #Check the accruracy of the models
    if EFDT_pred != y[idx]:
        EFDT_errors_count += 1
        
    if HAT_pred != y[idx]:
        HAT_errors_count += 1
        
    if EFHAT_pred != y[idx]:
        EFHAT_errors_count += 1
        
    # Calculate Error Rate
    if idx > 10000:
        EFDT_error_rate = EFDT_errors_count / (idx + 1)
        EFDT_error_rates.append([idx, EFDT_error_rate])
        
        HAT_error_rate = HAT_errors_count / (idx + 1)
        HAT_error_rates.append([idx, HAT_error_rate])
        
        EFHAT_error_rate = EFHAT_errors_count / (idx + 1)
        EFHAT_error_rates.append([idx, EFHAT_error_rate])
    
    # Print Every 10000 Iterations
    if idx % 10000 == 0:
        print(f'Instance: {idx}')
    
    # Fit the data point into the tree
    EFDT_model._fit_single(X[idx], y[idx])
    HAT_model._fit_single(X[idx], y[idx])
    EFHAT_model._fit_single(X[idx], y[idx])

Instance: 0


AttributeError: 'HAT' object has no attribute '_fit_single'

## Display the Results

In [None]:
import matplotlib.pyplot as plt

# Splitting the list into two lists, idxs and errors
EFDT_idxs, EFDT_errors = zip(*EFDT_error_rates)
HAT_idxs, HAT_errors = zip(*HAT_error_rates)
EFHAT_idxs, EFHAT_errors = zip(*EFHAT_error_rates)

# Plotting
plt.figure(figsize=(10, 6))  
plt.plot(EFDT_idxs, EFDT_errors, marker='o', linestyle='-', color='b') 
plt.plot(HAT_idxs, HAT_errors, marker='o', linestyle='-', color='g') 
plt.plot(EFHAT_idxs, EFHAT_errors, marker='o', linestyle='-', color='r') 
plt.title('Error Rate vs Instance')
plt.xlabel('Instance')
plt.ylabel('Error Rate')
plt.grid(True)
plt.show()