ModuleNotFoundError: No module named 'sko.ACO'

In [2]:
pip install sko


Collecting sko
  Obtaining dependency information for sko from https://files.pythonhosted.org/packages/fe/cf/6cd183a6ffe53db8dabbbd98a8a8497f34d87924a3718deb3d47ae349ab5/sko-0.5.7-py3-none-any.whl.metadata
  Downloading sko-0.5.7-py3-none-any.whl.metadata (1.9 kB)
Downloading sko-0.5.7-py3-none-any.whl (23 kB)
Installing collected packages: sko
Successfully installed sko-0.5.7
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


SCV+Pso

In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.feature_selection import VarianceThreshold
from imblearn.over_sampling import SMOTE
from pyswarm import pso
import time

# Step 1: Load and preprocess the dataset
features = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes",
    "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in",
    "num_compromised", "root_shell", "su_attempted", "num_root", "num_file_creations",
    "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login", "is_guest_login",
    "count", "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate",
    "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count",
    "dst_host_srv_count", "dst_host_same_srv_rate", "dst_host_diff_srv_rate",
    "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate",
    "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "label"
]

df_train = pd.read_csv("NSL_KDD_Train (1).csv", header=None, names=features)
df_test = pd.read_csv("NSL_KDD_Test (1).csv", header=None, names=features)

# Encode categorical features
categorical_features = ["protocol_type", "service", "flag"]
encoder = LabelEncoder()
for col in categorical_features:
    df_train[col] = encoder.fit_transform(df_train[col])
    df_test[col] = encoder.transform(df_test[col])

# Normalize numerical features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(df_train.drop(columns=["label"]))
y_train = df_train["label"].apply(lambda x: 1 if x == "normal" else 0)
X_test = scaler.transform(df_test.drop(columns=["label"]))
y_test = df_test["label"].apply(lambda x: 1 if x == "normal" else 0)

# Step 2: Apply Variance Threshold to remove low-variance features
selector = VarianceThreshold(threshold=0.01)
X_train = selector.fit_transform(X_train)
X_test = selector.transform(X_test)

# Step 3: Oversample minority class using SMOTE
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Step 4: Define PSO fitness function
def fitness_function(features_selected):
    selected_indices = np.where(features_selected > 0.5)[0]
    if len(selected_indices) == 0:  # Avoid empty feature subsets
        return 1.0  # Large error for invalid subsets
    
    X_train_selected = X_train_balanced[:, selected_indices]
    X_test_selected = X_test[:, selected_indices]
    
    model = SVC(kernel='rbf', max_iter=500)
    model.fit(X_train_selected, y_train_balanced)
    accuracy = model.score(X_test_selected, y_test)
    return 1 - accuracy  # Minimize error

# Step 5: Run PSO for feature selection
num_features = X_train.shape[1]
lb = [0] * num_features
ub = [1] * num_features

start_time = time.time()
best_features, _ = pso(fitness_function, lb, ub, swarmsize=20, maxiter=10)  # Increased iterations
feature_selection_time = time.time() - start_time

# Step 6: Train SVM on selected features
selected_features_indices = np.where(best_features > 0.5)[0]
X_train_selected = X_train_balanced[:, selected_features_indices]
X_test_selected = X_test[:, selected_features_indices]

param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.001, 0.01, 0.1, 'scale'],
    'kernel': ['rbf']
}
grid_search = GridSearchCV(SVC(max_iter=1000), param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train_selected, y_train_balanced)

best_model = grid_search.best_estimator_

# Step 7: Evaluate the model
y_pred = best_model.predict(X_test_selected)
classification_report_final = classification_report(y_test, y_pred, output_dict=True)

# Metrics
accuracy = classification_report_final["accuracy"]
precision = classification_report_final["1"]["precision"]
recall = classification_report_final["1"]["recall"]
f1_score = classification_report_final["1"]["f1-score"]

# Print results
print(f"Number of Selected Features: {len(selected_features_indices)}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1_score:.4f}")
print(f"Feature Selection Time: {feature_selection_time:.2f} seconds")


ModuleNotFoundError: No module named 'sko.ACO'

SVM+ACO

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
import time

# Load the dataset
features = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes",
    "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in",
    "num_compromised", "root_shell", "su_attempted", "num_root", "num_file_creations",
    "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login", "is_guest_login",
    "count", "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate",
    "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count",
    "dst_host_srv_count", "dst_host_same_srv_rate", "dst_host_diff_srv_rate",
    "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate",
    "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "label"
]

df_train = pd.read_csv("NSL_KDD_Train (1).csv", header=None, names=features)
df_test = pd.read_csv("NSL_KDD_Test (1).csv", header=None, names=features)

# Encode categorical features
categorical_features = ["protocol_type", "service", "flag"]
encoder = LabelEncoder()
for col in categorical_features:
    df_train[col] = encoder.fit_transform(df_train[col])
    df_test[col] = encoder.transform(df_test[col])

# Normalize numerical features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(df_train.drop(columns=["label"]))
y_train = df_train["label"].apply(lambda x: 1 if x == "normal" else 0)
X_test = scaler.transform(df_test.drop(columns=["label"]))
y_test = df_test["label"].apply(lambda x: 1 if x == "normal" else 0)

# Apply SMOTE to balance the classes
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Step 1: Define the fitness function for ACO (for feature selection)
def fitness_function(ant_solution):
    # Extract selected features based on ant's solution
    selected_indices = np.where(ant_solution > 0.5)[0]
    if len(selected_indices) == 0:  # Avoid empty feature subsets
        return 1.0  # Large error for invalid subsets
    
    X_train_selected = X_train_balanced[:, selected_indices]
    X_test_selected = X_test[:, selected_indices]
    
    model = SVC(kernel='rbf', max_iter=500)
    model.fit(X_train_selected, y_train_balanced)
    accuracy = model.score(X_test_selected, y_test)
    return 1 - accuracy  # Minimize error (maximize accuracy)

# Step 2: Ant Colony Optimization (ACO) Parameters
class ACO:
    def __init__(self, n_features, fitness_function, n_ants=20, n_best=5, n_iter=10, pheromone_decay=0.95, pheromone_init=0.1, alpha=1, beta=2):
        self.n_features = n_features
        self.fitness_function = fitness_function
        self.n_ants = n_ants
        self.n_best = n_best
        self.n_iter = n_iter
        self.pheromone_decay = pheromone_decay
        self.pheromone_init = pheromone_init
        self.alpha = alpha
        self.beta = beta
        
        # Initialize pheromone matrix
        self.pheromone = np.ones((self.n_ants, self.n_features)) * self.pheromone_init
        self.best_solution = None
        self.best_fitness = float("inf")

    def run(self):
        for _ in range(self.n_iter):
            # Construct solutions for each ant
            solutions = np.random.rand(self.n_ants, self.n_features) > 0.5
            
            # Evaluate fitness of all ants' solutions
            fitness_values = np.array([self.fitness_function(solution) for solution in solutions])
            
            # Update the best solution
            best_ant = np.argmin(fitness_values)
            if fitness_values[best_ant] < self.best_fitness:
                self.best_fitness = fitness_values[best_ant]
                self.best_solution = solutions[best_ant]
            
            # Update pheromone levels
            self.pheromone *= self.pheromone_decay  # Decay pheromone
            for i in range(self.n_best):
                ant = np.argmin(fitness_values)
                self.pheromone[ant] += self.best_fitness / (1 + fitness_values[ant])  # Reinforce pheromone for best ants
            
        return self.best_solution, self.best_fitness

# Step 3: Configure and run ACO for feature selection
start_time = time.time()
aco = ACO(n_features=X_train.shape[1], fitness_function=fitness_function, n_ants=20, n_best=5, n_iter=10)
best_solution, _ = aco.run()

feature_selection_time = time.time() - start_time

# Step 4: Train SVM on selected features
selected_features_indices = np.where(best_solution > 0.5)[0]
X_train_selected = X_train_balanced[:, selected_features_indices]
X_test_selected = X_test[:, selected_features_indices]

# Train SVM
model = SVC(kernel='rbf', max_iter=1000)
model.fit(X_train_selected, y_train_balanced)
y_pred = model.predict(X_test_selected)

# Step 5: Generate classification report
classification_report_final = classification_report(y_test, y_pred, output_dict=True)

# Metrics
accuracy = classification_report_final["accuracy"]
precision = classification_report_final["1"]["precision"]
recall = classification_report_final["1"]["recall"]
f1_score = classification_report_final["1"]["f1-score"]

# Print results
print(f"Number of Selected Features: {len(selected_features_indices)}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1_score:.4f}")
print(f"Feature Selection Time: {feature_selection_time:.2f} seconds")
