In [None]:
import json
import re
from datetime import datetime

class CitationLogger:
    def __init__(self, author, title, repo_url, year=None):
        self.author = author
        self.title = title
        self.repo_url = repo_url
        self.year = year or datetime.now().year
        self.bib_key = self._generate_bib_key()

    def _generate_bib_key(self):
        first = re.sub(r'[^a-zA-Z]', '', self.author.split()[0].lower())
        title_token = re.sub(r'[^a-zA-Z]', '', self.title.split()[0].lower())
        return f"{first}{self.year}{title_token}"

    def markdown(self):
        return f"""### Citation  
{self.author} ({self.year}). *{self.title}*. GitHub Repository: [{self.repo_url.split('/')[-1]}]({self.repo_url})
"""

    def bibtex(self):
        return f"""@misc{{{self.bib_key},
  author       = {{{self.author}}},
  title        = {{{self.title}}},
  year         = {{{self.year}}},
  howpublished = {{\\url{{{self.repo_url}}}}},
  note         = {{GitHub Repository}}
}}"""

    def json(self):
        return {
            "author": self.author,
            "year": self.year,
            "title": self.title,
            "repository": "GitHub",
            "url": self.repo_url
        }

    def display(self):
        print("📄 Markdown Citation:\n")
        print(self.markdown())
        print("\n📚 BibTeX Citation:\n")
        print(self.bibtex())
        print("\n🧾 JSON Citation:\n")
        print(json.dumps(self.json(), indent=4))

# 🔧 Example usage
citation = CitationLogger(
    author="Nouri Baher",
    title="Hybrid Oversampling for Intrusion Detection: SMOTE + KGSMOTE",
    repo_url="https://github.com/Nouribaher/ids-hybrid-oversampling-smote-kgsmote",
    year=2025
)

citation.display()

In [None]:
!pip install imbalanced-learn

In [None]:
import pandas as pd, numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

# Load dataset
df = pd.read_excel('KDDTest_Normalized.xlsx')
# Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
df['label_name'] = le.inverse_transform(df['label'])

# Normalize features
X_raw = df.drop(columns=['label', 'label_name'])
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_raw)
y = df['label']


In [None]:
#ImBalanced dataset
df.label.value_counts()

In [None]:
from collections import Counter

# Keep only classes with at least 3 samples
counts = Counter(y)
valid_classes = [cls for cls, count in counts.items() if count >= 3]
mask = df['label'].isin(valid_classes)

X_filtered = X_scaled[mask]
y_filtered = y[mask]

In [None]:
from imblearn.over_sampling import SMOTE
# Balanced dataset using SMOTE (safe config)
smote = SMOTE(k_neighbors=2)
X_smote, y_smote = smote.fit_resample(X_filtered, y_filtered)

df_smote = pd.DataFrame(X_smote, columns=X_raw.columns)
df_smote['label'] = y_smote
df_smote['label_name'] = le.inverse_transform(y_smote)

df_smote.to_excel('SMOTE_Balanced.xlsx', index=False)

In [None]:
# Balanced dataset using SMOTE (safe config)
df_smote.label.value_counts()

In [None]:
from sklearn.neighbors import KernelDensity

# Choose one minority class to simulate KGSMOTE
minority_class_id = valid_classes[-1]  # pick last valid class
minority_df = df[df['label'] == minority_class_id]
X_minority = scaler.transform(minority_df.drop(columns=['label', 'label_name']))

# Fit KDE and sample synthetic points
kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X_minority)
synthetic = kde.sample(n_samples=500)

synthetic_df = pd.DataFrame(synthetic, columns=X_raw.columns)
synthetic_df['label'] = minority_class_id
synthetic_df['label_name'] = le.inverse_transform([minority_class_id])[0]

# Combine with original data
df_kgsmote = pd.concat([df, synthetic_df], ignore_index=True)
df_kgsmote.to_excel('KGSMOTE_Balanced.xlsx', index=False)

In [None]:
def balance(X, y, method='smote', minority_class_id=None, n_samples=500, k_neighbors=2):
    from imblearn.over_sampling import SMOTE
    from sklearn.neighbors import KernelDensity
    import pandas as pd
    import numpy as np
    from collections import Counter

    # Filter out classes with too few samples for SMOTE
    def filter_valid_classes(X, y, min_samples):
        counts = Counter(y)
        valid_classes = [cls for cls, count in counts.items() if count >= min_samples]
        mask = np.isin(y, valid_classes)
        return X[mask], y[mask]

    if method == 'smote':
        # Ensure SMOTE won't fail due to rare classes
        X_safe, y_safe = filter_valid_classes(X, y, min_samples=k_neighbors + 1)
        smote = SMOTE(k_neighbors=k_neighbors)
        X_res, y_res = smote.fit_resample(X_safe, y_safe)
        return X_res, y_res

    elif method == 'kgsmote':
        if minority_class_id is None:
            raise ValueError("You must specify minority_class_id for KGSMOTE.")
        mask = y == minority_class_id
        X_minority = X[mask]
        kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X_minority)
        synthetic = kde.sample(n_samples=n_samples)
        X_res = pd.concat([pd.DataFrame(X), pd.DataFrame(synthetic)], ignore_index=True)
        y_res = pd.concat([pd.Series(y), pd.Series([minority_class_id]*n_samples)], ignore_index=True)
        return X_res.values, y_res.values

    elif method == 'hybrid':
        # Filter before SMOTE
        X_safe, y_safe = filter_valid_classes(X, y, min_samples=k_neighbors + 1)
        smote = SMOTE(k_neighbors=k_neighbors)
        X_smote, y_smote = smote.fit_resample(X_safe, y_safe)

        # Apply KGSMOTE to refine one minority class
        if minority_class_id is None:
            raise ValueError("You must specify minority_class_id for hybrid.")
        mask = y_smote == minority_class_id
        X_minority = X_smote[mask]
        kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(X_minority)
        synthetic = kde.sample(n_samples=n_samples)
        X_final = pd.concat([pd.DataFrame(X_smote), pd.DataFrame(synthetic)], ignore_index=True)
        y_final = pd.concat([pd.Series(y_smote), pd.Series([minority_class_id]*n_samples)], ignore_index=True)
        return X_final.values, y_final.values

    else:
        raise ValueError("Method must be 'smote', 'kgsmote', or 'hybrid'.")

In [None]:
X_bal, y_bal = balance(X_scaled, y, method='smote')

In [None]:
df_smote = pd.DataFrame(X_bal, columns=X_raw.columns)
df_smote['label'] = y_bal
df_smote['label_name'] = le.inverse_transform(y_bal)

In [None]:
df_smote.to_excel('results-SMOTE_Balanced.xlsx', index=False)

In [None]:
# Balanced dataset using SMOTE (safe config)
df_smote.label.value_counts()

In [None]:
X_kg, y_kg = balance(X_scaled, y, method='kgsmote', minority_class_id=5)
df_kgsmote = pd.DataFrame(X_kg, columns=X_raw.columns)
df_kgsmote['label'] = y_kg
df_kgsmote['label_name'] = le.inverse_transform(y_kg)
df_kgsmote.to_excel('results-KGSMOTE_Balanced.xlsx', index=False)

In [None]:
X_hybrid, y_hybrid = balance(X_scaled, y, method='hybrid', minority_class_id=5)
df_hybrid = pd.DataFrame(X_hybrid, columns=X_raw.columns)
df_hybrid['label'] = y_hybrid
df_hybrid['label_name'] = le.inverse_transform(y_hybrid)
df_hybrid.to_excel('results-Hybrid_Balanced.xlsx', index=False)