In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import accuracy_score

#### Using optimization

In [18]:
def moth_flame_optimization(X_train, y_train, X_test, y_test, n_iterations=100, n_population=20, alpha=0.5, beta=0.5, gamma=1):
    """
    This function implements the Moth Flame Optimization algorithm for tuning the hyperparameters of a Decision Tree Classifier.
    
    Parameters:
    X_train (numpy.ndarray): The training data features
    y_train (numpy.ndarray): The training data labels
    X_test (numpy.ndarray): The testing data features
    y_test (numpy.ndarray): The testing data labels
    n_iterations (int): The number of iterations to run the algorithm (default=50)
    n_population (int): The number of moths in the population (default=20)
    alpha (float): The light absorption coefficient (default=0.5)
    beta (float): The attraction coefficient (default=0.5)
    gamma (float): The random walk step size (default=1)
    
    Returns:
    dict: A dictionary containing the best hyperparameters found by the algorithm and the corresponding accuracy score
    """
    try:
        # Check if the input data is valid
        if not isinstance(X_train, np.ndarray) or not isinstance(y_train, np.ndarray) or not isinstance(X_test, np.ndarray) or not isinstance(y_test, np.ndarray):
            raise TypeError("Input data must be numpy arrays")
        if X_train.shape[0] != y_train.shape[0] or X_test.shape[0] != y_test.shape[0]:
            raise ValueError("Number of samples in input data and labels must be equal")
        
        # Define the hyperparameter search space
        max_depth_range = range(1, 21)
        min_samples_split_range = range(2, 21)
        min_samples_leaf_range = range(1, 21)
        max_features_range = ["sqrt", "log2", None]
        
        # Initialize the population of moths
        population = []
        for i in range(n_population):
            max_depth = np.random.choice(max_depth_range)
            min_samples_split = np.random.choice(min_samples_split_range)
            min_samples_leaf = np.random.choice(min_samples_leaf_range)
            max_features = np.random.choice(max_features_range)
            moth = {"max_depth": max_depth, "min_samples_split": min_samples_split, "min_samples_leaf": min_samples_leaf, "max_features": max_features}
            population.append(moth)
        
        # Evaluate the fitness of each moth in the population
        fitness = []
        for moth in population:
            clf = DecisionTreeClassifier(max_depth=int(moth["max_depth"]), min_samples_split=moth["min_samples_split"], min_samples_leaf=moth["min_samples_leaf"], max_features=moth["max_features"])
            clf.fit(X_train, y_train)
            score = clf.score(X_test, y_test)
            fitness.append(score)
        
        # Find the best moth in the population
        best_moth = population[np.argmax(fitness)]
        best_fitness = max(fitness)
        
        # Run the optimization algorithm
        for i in range(n_iterations):
            for j in range(n_population):
                # Calculate the distance between the current moth and the best moth
                distance = np.sqrt((population[j]["max_depth"] - best_moth["max_depth"])**2 + (population[j]["min_samples_split"] - best_moth["min_samples_split"])**2 + (population[j]["min_samples_leaf"] - best_moth["min_samples_leaf"])**2 + (population[j]["max_features"] != best_moth["max_features"]))
                
                # Calculate the intensity of the light
                intensity = 1 / (1 + alpha * distance)
                
                # Calculate the direction of the movement
                direction = np.random.uniform(low=-1, high=1, size=4)
                
                # Calculate the step size
                step_size = gamma * np.abs(np.random.normal(size=4))
                
                # Calculate the new position of the moth
                new_moth = {"max_depth": population[j]["max_depth"] + beta * intensity * direction[0] * step_size[0], "min_samples_split": population[j]["min_samples_split"] + beta * intensity * direction[1] * step_size[1], "min_samples_leaf": population[j]["min_samples_leaf"] + beta * intensity * direction[2] * step_size[2], "max_features": population[j]["max_features"]}
                
                # Check if the new position is valid
                if new_moth["max_depth"] < 1:
                    new_moth["max_depth"] = 1
                elif new_moth["max_depth"] > 20:
                    new_moth["max_depth"] = 20
                if new_moth["min_samples_split"] < 2:
                    new_moth["min_samples_split"] = 2
                elif new_moth["min_samples_split"] > 2:
                    new_moth["min_samples_split"] = 2
                if new_moth["min_samples_leaf"] < 1:
                    new_moth["min_samples_leaf"] = 1
                elif new_moth["min_samples_leaf"] > 20:
                    new_moth["min_samples_leaf"] = 20
                
                # Evaluate the fitness of the new moth
                clf = DecisionTreeClassifier(max_depth=int(new_moth["max_depth"]), min_samples_split=new_moth["min_samples_split"], min_samples_leaf=new_moth["min_samples_leaf"], max_features=new_moth["max_features"])
                clf.fit(X_train, y_train)
                score = clf.score(X_test, y_test)
                
                # Update the best moth if necessary
                if score > best_fitness:
                    best_moth = new_moth
                    best_fitness = score
                
                # Replace the current moth with the new moth if it has higher fitness
                if score > fitness[j]:
                    population[j] = new_moth
                    fitness[j] = score
        
        # Return the best hyperparameters and the corresponding accuracy score
        result = {"max_depth": int(best_moth["max_depth"]), "min_samples_split": int(best_moth["min_samples_split"]), "min_samples_leaf": int(best_moth["min_samples_leaf"]), "max_features": best_moth["max_features"], "accuracy": best_fitness}
        return result
    except Exception as e:
        # Log the error
        print(f"Error: {e}")
        return 0

In [19]:
df = pd.read_csv("diabetes_preprocessed.csv")

In [20]:
# Split the dataset into features and target variable
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

In [21]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [23]:
best_params_50 = moth_flame_optimization(X_train, y_train, X_test, y_test, n_iterations=50)
best_params_100 = moth_flame_optimization(X_train, y_train, X_test, y_test, n_iterations=100)

In [24]:
best_params_50

{'max_depth': 17,
 'min_samples_split': 3,
 'min_samples_leaf': 2,
 'max_features': 'log2',
 'accuracy': 0.9225}

In [25]:
best_params_100

{'max_depth': 20,
 'min_samples_split': 2,
 'min_samples_leaf': 3,
 'max_features': None,
 'accuracy': 0.9175}

#### Without Optimization

In [9]:
dt = DecisionTreeClassifier()

In [10]:
dt.fit(X_train, y_train)

In [11]:
ypred = dt.predict(X_test)

In [12]:
print(accuracy_score(y_test, ypred))

0.9775


In [13]:
dt.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}