In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [20]:
data=pd.read_csv('Latest Covid-19 India Status.csv')
data

Unnamed: 0,State/UTs,Total Cases,Active,Discharged,Deaths,Active Ratio,Discharge Ratio,Death Ratio,Population
0,Andaman and Nicobar,10766,0,10637,129,0.0,98.8,1.2,100896618
1,Andhra Pradesh,2340676,0,2325943,14733,0.0,99.37,0.63,128500364
2,Arunachal Pradesh,67049,0,66753,296,0.0,99.56,0.44,658019
3,Assam,746159,5,738119,8035,0.0,98.92,1.08,290492
4,Bihar,855267,1,842952,12314,0.0,98.56,1.44,40100376
5,Chandigarh,100693,0,99508,1185,0.0,98.82,1.18,30501026
6,Chhattisgarh,1187695,0,1173505,14190,0.0,98.81,1.19,28900667
7,Dadra and Nagar Haveli and Daman and Diu,11592,0,11588,4,0.0,99.97,0.03,231502578
8,Delhi,2040910,14,2014230,26666,0.0,98.69,1.31,773997
9,Goa,263346,3,259329,4014,0.0,98.47,1.52,3772103


In [22]:
# تصنيف Active Ratio إلى فئتين
data['Death Ratio'] = (data['Death Ratio'] > 0.5).astype(int)  # 0: منخفض، 1: مرتفع
# Features and Target
X = data.drop(columns=["Death Ratio","State/UTs"])
y = data["Death Ratio"]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
data


Unnamed: 0,State/UTs,Total Cases,Active,Discharged,Deaths,Active Ratio,Discharge Ratio,Death Ratio,Population
0,Andaman and Nicobar,10766,0,10637,129,0.0,98.8,1,100896618
1,Andhra Pradesh,2340676,0,2325943,14733,0.0,99.37,1,128500364
2,Arunachal Pradesh,67049,0,66753,296,0.0,99.56,0,658019
3,Assam,746159,5,738119,8035,0.0,98.92,1,290492
4,Bihar,855267,1,842952,12314,0.0,98.56,1,40100376
5,Chandigarh,100693,0,99508,1185,0.0,98.82,1,30501026
6,Chhattisgarh,1187695,0,1173505,14190,0.0,98.81,1,28900667
7,Dadra and Nagar Haveli and Daman and Diu,11592,0,11588,4,0.0,99.97,0,231502578
8,Delhi,2040910,14,2014230,26666,0.0,98.69,1,773997
9,Goa,263346,3,259329,4014,0.0,98.47,1,3772103


In [29]:
# Bat Algorithm Parameters
num_bats = 10
max_iter =20
loudness = 0.5
pulse_rate = 0.5
frequency_min, frequency_max = 0, 2

# Fitness Function with Additional Metrics
def fitness_function_with_metrics(solution):
    selected_features = np.where(solution > 0.5)[0]
    if len(selected_features) == 0:
        return 0, 0, 0, 0  # Return 0 for accuracy, precision, recall, and F1 if no features are selected

    # Train model on selected features
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train.iloc[:, selected_features], y_train)
    preds = clf.predict(X_test.iloc[:, selected_features])

    # Calculate metrics
    accuracy = accuracy_score(y_test, preds)
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)

    return accuracy, precision, recall, f1

# Initialize bats
positions = np.random.rand(num_bats, X.shape[1])  # Random positions in [0,1]
velocities = np.random.uniform(-1, 1, (num_bats, X.shape[1]))  # Random velocities
best_global = positions[np.random.randint(0, num_bats)]  # Randomly select a bat
best_global_accuracy, best_global_precision, best_global_recall, best_global_f1 = fitness_function_with_metrics(best_global)




In [31]:
# Bat Algorithm
for t in range(max_iter):
    for i in range(num_bats):
        # Calculate frequency and update velocity and position
        frequency = frequency_min + (frequency_max - frequency_min) * np.random.rand()
        velocities[i] += (positions[i] - best_global) * frequency
        positions[i] = np.clip(positions[i] + velocities[i], 0, 1)  # Ensure positions stay in range

        # Local search
        if np.random.rand() > pulse_rate:
            positions[i] = np.clip(best_global + np.random.normal(0, 0.1, size=X.shape[1]), 0, 1)

        # Evaluate fitness
        accuracy, precision, recall, f1 = fitness_function_with_metrics(positions[i])
        if accuracy > best_global_accuracy and np.random.rand() < loudness:
            best_global = positions[i]
            best_global_accuracy, best_global_precision, best_global_recall, best_global_f1 = accuracy, precision, recall, f1

    # Update loudness and pulse rate
    loudness = max(0.1, loudness * 0.95)
    pulse_rate = min(1.0, pulse_rate * 1.05)

    # Print progress
    print(f"Iteration {t + 1}:")
    print(f"   Best Accuracy = {best_global_accuracy:.4f}")
    print(f"   Best Precision = {best_global_precision:.4f}")
    print(f"   Best Recall = {best_global_recall:.4f}")
    print(f"   Best F1 Score = {best_global_f1:.4f}")
    selected_features = np.where(best_global > 0.5)[0]
    print("   Selected Features:", X.columns[selected_features].tolist())
    print()

# Final Results
print("Final Results:")
print(f"Best Accuracy: {best_global_accuracy:.4f}")
print(f"Best Precision: {best_global_precision:.4f}")
print(f"Best Recall: {best_global_recall:.4f}")
print(f"Best F1 Score: {best_global_f1:.4f}")
print("Selected Features:", X.columns[selected_features].tolist())

Iteration 1:
   Best Accuracy = 0.9091
   Best Precision = 0.9091
   Best Recall = 1.0000
   Best F1 Score = 0.9524
   Selected Features: ['Total Cases', 'Active', 'Deaths', 'Discharge Ratio']

Iteration 2:
   Best Accuracy = 0.9091
   Best Precision = 0.9091
   Best Recall = 1.0000
   Best F1 Score = 0.9524
   Selected Features: ['Total Cases', 'Active', 'Deaths', 'Discharge Ratio']

Iteration 3:
   Best Accuracy = 0.9091
   Best Precision = 0.9091
   Best Recall = 1.0000
   Best F1 Score = 0.9524
   Selected Features: ['Total Cases', 'Active', 'Deaths', 'Discharge Ratio']

Iteration 4:
   Best Accuracy = 0.9091
   Best Precision = 0.9091
   Best Recall = 1.0000
   Best F1 Score = 0.9524
   Selected Features: ['Total Cases', 'Active', 'Deaths', 'Discharge Ratio']

Iteration 5:
   Best Accuracy = 0.9091
   Best Precision = 0.9091
   Best Recall = 1.0000
   Best F1 Score = 0.9524
   Selected Features: ['Total Cases', 'Active', 'Deaths', 'Discharge Ratio']

Iteration 6:
   Best Accuracy 