<a href="https://colab.research.google.com/github/ELiTE0005/NNML-algorithms/blob/main/Candidate_Elimination.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

# Load the dataset
file_path = "cybersecurity_intrusion_data.csv"
df = pd.read_csv(file_path)

# Convert numerical features into categorical bins
numeric_features = ["network_packet_size", "login_attempts", "session_duration",
                    "ip_reputation_score", "failed_logins", "unusual_time_access"]

for feature in numeric_features:
    unique_vals = df[feature].nunique()
    if unique_vals > 4:  # Use fixed binning for better stability
        df[feature] = pd.cut(df[feature], bins=4, labels=["low", "medium", "high", "very_high"], include_lowest=True)
    elif unique_vals > 2:
        df[feature] = pd.cut(df[feature], bins=unique_vals, labels=[str(i) for i in range(unique_vals)], include_lowest=True)
    else:
        df[feature] = df[feature].astype(str)  # Convert binary features to categorical strings

# Drop session_id if present
df = df.drop(columns=["session_id"], errors='ignore')

# Extract features (X) and target variable (y)
X = df.drop(columns=["attack_detected"]).values
y = df["attack_detected"].values

# Initialize S (Specific boundary) with the first positive example
S = X[np.where(y == 1)][0].copy()

# Initialize G (General boundary) with the most general hypothesis
G = [["?" for _ in range(len(S))]]

# Candidate Elimination Algorithm
for i in range(len(X)):
    instance, label = X[i], y[i]

    if label == 1:  # Positive example
        # Generalize S to be consistent with instance
        for j in range(len(S)):
            if S[j] != instance[j]:
                S[j] = "?"

        # Remove inconsistent hypotheses from G
        G = [g for g in G if all(g[k] == "?" or g[k] == instance[k] for k in range(len(S)))]

    elif label == 0:  # Negative example
        # Remove inconsistent hypotheses from S
        S = [S[j] if S[j] == instance[j] else "?" for j in range(len(S))]

        # Specialize G to exclude the instance
        new_G = []
        for g in G:
            for j in range(len(g)):
                if g[j] == "?":
                    specialized_hypothesis = g.copy()
                    specialized_hypothesis[j] = instance[j]
                    new_G.append(specialized_hypothesis)

        G = new_G

# Convert to unique sets for final hypothesis space
S_final = tuple(S)
G_final = [tuple(g) for g in G]

print("Specific Hypothesis (S):", S_final)
print("General Hypothesis Set (G):", G_final)


Specific Hypothesis (S): ('?', '?', '?', '?', '?', '?', '?', '?', '?')
General Hypothesis Set (G): []
