In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

df1 = pd.read_csv("diabetes.csv")
df1 = df1[(df1['Glucose'] != 0) & (df1['Insulin'] != 0)]
df1 = df1[(df1['BloodPressure'] != 0) & (df1['SkinThickness'] != 0) & (df1['BMI'] != 0) & (df1['DiabetesPedigreeFunction'] != 0)]
df1 = df1.reset_index(drop=True)
df2 = df1
X = df2.drop('Outcome', axis = 1)
y = df2.Outcome

In [16]:
def main(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    best_position, best_value = pso(num_particles, n_estimators_range, max_depth_range, min_samples_split_range, min_samples_leaf_range, max_iter,X,y)
    n_estimators, max_depth, min_samples_split, min_samples_leaf = best_position
    model_PSO = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )
    model_PSO.fit(X_train, y_train)
    predictions = model_PSO.predict(X_test)
    
    accuracy = accuracy_score(y_test, predictions)
    
    tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()

    # Calculate Sensitivity (True Positive Rate)
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0

    # Calculate Specificity (True Negative Rate)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Calculate Prevalence
    prevalence = (tp + fn) / (tp + fn + tn + fp) if (tp + fn + tn + fp) > 0 else 0
    
    accuracy = f"{accuracy:.3f}"
    specificity = f"{specificity:.3f}"
    sensitivity = f"{sensitivity:.3f}"
    prevalence = f"{prevalence:.3f}"
    f1 = f1_score(y_test, predictions)
    f1 = f"{f1:.3f}"
    print('PSO: ')
    print(f"Best Position (n_estimators, max_depth, min_samples_split, min_samples_leaf): {best_position}")
    print(f"Best Value (Accuracy): {best_value}")
    return model_PSO, accuracy, specificity, sensitivity, prevalence, f1

In [13]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Define the Particle class
class Particle:
    def __init__(self, n_estimators_range, max_depth_range, min_samples_split_range, min_samples_leaf_range):
        self.position = np.array([
            np.random.randint(n_estimators_range[0], n_estimators_range[1]),  # n_estimators
            np.random.randint(max_depth_range[0], max_depth_range[1]),         # max_depth
            np.random.randint(min_samples_split_range[0], min_samples_split_range[1]),  # min_samples_split
            np.random.randint(min_samples_leaf_range[0], min_samples_leaf_range[1])       # min_samples_leaf
        ])
        self.velocity = np.random.uniform(-1, 1, 4)
        self.best_position = np.copy(self.position)
        self.best_value = float('inf')  # Start with infinity

# Define the objective function
def evaluate_particle(particle, X_train, y_train, X_test, y_test):
    n_estimators = int(particle.position[0])
    max_depth = int(particle.position[1])
    min_samples_split = int(particle.position[2])
    min_samples_leaf = int(particle.position[3])
    
    model_PSO = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )
    model_PSO.fit(X_train, y_train)
    predictions = model_PSO.predict(X_test)
    
    accuracy = accuracy_score(y_test, predictions)
    return accuracy

# PSO algorithm
def pso(num_particles, n_estimators_range, max_depth_range, min_samples_split_range, min_samples_leaf_range, max_iter,X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize particles
    particles = [Particle(n_estimators_range, max_depth_range, min_samples_split_range, min_samples_leaf_range) for _ in range(num_particles)]
    global_best_position = np.copy(particles[0].best_position)
    global_best_value = float('inf')

    # Hyperparameters
    w = 0.5  # inertia weight
    c1 = 1.5  # cognitive (personal) weight
    c2 = 1.5  # social (global) weight

    # Main loop
    for iter in range(max_iter):
        for particle in particles:
            # Evaluate particle
            current_value = evaluate_particle(particle, X_train, y_train, X_test, y_test)

            # Update personal best
            if current_value < particle.best_value:
                particle.best_value = current_value
                particle.best_position = np.copy(particle.position)

            # Update global best
            if current_value < global_best_value:
                global_best_value = current_value
                global_best_position = np.copy(particle.position)

            # Update velocity
            r1 = np.random.rand(4)  # Generate 4 random values
            r2 = np.random.rand(4)  # Generate another 4 random values
            particle.velocity = (w * particle.velocity +
                                 c1 * r1 * (particle.best_position - particle.position) +
                                 c2 * r2 * (global_best_position - particle.position))

            # Update position
            particle.position = np.round(particle.position + particle.velocity).astype(int)
            
            # Ensure position is within bounds
            particle.position[0] = np.clip(particle.position[0], n_estimators_range[0], n_estimators_range[1])
            particle.position[1] = np.clip(particle.position[1], max_depth_range[0], max_depth_range[1])
            particle.position[2] = np.clip(particle.position[2], min_samples_split_range[0], min_samples_split_range[1])
            particle.position[3] = np.clip(particle.position[3], min_samples_leaf_range[0], min_samples_leaf_range[1])

    return global_best_position, global_best_value

# Parameters
num_particles = 30
n_estimators_range = (10, 200)  # Range for n
max_depth_range = (8,20)
min_samples_split_range = (2,10)
min_samples_leaf_range = (2,10)
max_iter = 25
# Run PSO

In [17]:
if __name__ == "__main__":
    main(X, y)

PSO: 
Best Position (n_estimators, max_depth, min_samples_split, min_samples_leaf): [58 14  6 10]
Best Value (Accuracy): 0.6530612244897959
