In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load dataset
file_path = "dataset.csv"
data = pd.read_csv(file_path)

# Assuming the dataset has a column 'true_label' with 1 for normal and -1 for anomalies
# and the rest are feature columns
true_label = data['true_label']
features = data.drop(columns=['true_label'])

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Function to calculate metrics
def calculate_metrics(true, pred, model_name):
    precision = precision_score(true, pred, pos_label=-1)
    recall = recall_score(true, pred, pos_label=-1)
    f1 = f1_score(true, pred, pos_label=-1)
    print(f"{model_name} Metrics:")
    print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
    return precision, recall, f1

# LOF
lof = LocalOutlierFactor()
lof_pred = lof.fit_predict(features_scaled)
lof_pred = np.where(lof_pred == -1, -1, 1)
lof_metrics = calculate_metrics(true_label, lof_pred, "LOF")

# One-Class SVM
ocsvm = OneClassSVM()
ocsvm.fit(features_scaled)
ocsvm_pred = ocsvm.predict(features_scaled)
ocsvm_pred = np.where(ocsvm_pred == -1, -1, 1)
ocsvm_metrics = calculate_metrics(true_label, ocsvm_pred, "One-Class SVM")

# Elliptic Envelope
elliptic = EllipticEnvelope()
elliptic.fit(features_scaled)
elliptic_pred = elliptic.predict(features_scaled)
elliptic_pred = np.where(elliptic_pred == -1, -1, 1)
elliptic_metrics = calculate_metrics(true_label, elliptic_pred, "Elliptic Envelope")

# K-Means
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(features_scaled)
kmeans_pred = kmeans.labels_
# Map clusters to labels (assuming smaller cluster is anomaly)
kmeans_pred = np.where(kmeans_pred == kmeans_pred.min(), -1, 1)
kmeans_metrics = calculate_metrics(true_label, kmeans_pred, "K-Means")

# DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_pred = dbscan.fit_predict(features_scaled)
# Map noise (-1 in DBSCAN) to anomalies
dbscan_pred = np.where(dbscan_pred == -1, -1, 1)
dbscan_metrics = calculate_metrics(true_label, dbscan_pred, "DBSCAN")

# Autoencoder
# Build and train the autoencoder
input_dim = features_scaled.shape[1]
autoencoder = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(32, activation='relu'),
    Dense(input_dim, activation='sigmoid')
])
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(features_scaled, features_scaled, epochs=10, batch_size=32, verbose=0)

# Reconstruction error as anomaly score
reconstruction_error = np.mean(np.square(features_scaled - autoencoder.predict(features_scaled)), axis=1)
threshold = np.percentile(reconstruction_error, 95)  # Top 5% as anomalies
autoencoder_pred = np.where(reconstruction_error > threshold, -1, 1)
autoencoder_metrics = calculate_metrics(true_label, autoencoder_pred, "Autoencoder")

# Analyze confusion matrices to identify trends in errors
def analyze_errors(true, pred, model_name):
    cm = confusion_matrix(true, pred, labels=[-1, 1])
    fp = cm[1][0]  # False positives
    fn = cm[0][1]  # False negatives
    print(f"{model_name} Errors:")
    print(f"False Positives (FP): {fp}, False Negatives (FN): {fn}")
    return fp, fn

# Analyze errors for all models
lof_errors = analyze_errors(true_label, lof_pred, "LOF")
ocsvm_errors = analyze_errors(true_label, ocsvm_pred, "One-Class SVM")
elliptic_errors = analyze_errors(true_label, elliptic_pred, "Elliptic Envelope")
kmeans_errors = analyze_errors(true_label, kmeans_pred, "K-Means")
dbscan_errors = analyze_errors(true_label, dbscan_pred, "DBSCAN")
autoencoder_errors = analyze_errors(true_label, autoencoder_pred, "Autoencoder")

# Compare models
print("\nModel Comparison (based on F1-Score):")
metrics = {
    "LOF": lof_metrics,
    "One-Class SVM": ocsvm_metrics,
    "Elliptic Envelope": elliptic_metrics,
    "K-Means": kmeans_metrics,
    "DBSCAN": dbscan_metrics,
    "Autoencoder": autoencoder_metrics
}

for model, (precision, recall, f1) in metrics.items():
    print(f"{model}: Precision={precision:.2f}, Recall={recall:.2f}, F1-Score={f1:.2f}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load dataset
file_path = "dataset.csv"
data = pd.read_csv(file_path)

# Assuming the dataset has a column 'true_label' with 1 for normal and -1 for anomalies
# and the rest are feature columns
true_label = data['true_label']
features = data.drop(columns=['true_label'])

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Function to calculate metrics
def calculate_metrics(true, pred, model_name):
    precision = precision_score(true, pred, pos_label=-1)
    recall = recall_score(true, pred, pos_label=-1)
    f1 = f1_score(true, pred, pos_label=-1)
    print(f"{model_name} Metrics:")
    print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
    return precision, recall, f1

# Function to analyze errors
def analyze_errors(true, pred, model_name):
    cm = confusion_matrix(true, pred, labels=[-1, 1])
    fp = cm[1][0]  # False positives
    fn = cm[0][1]  # False negatives
    print(f"{model_name} Errors:")
    print(f"False Positives (FP): {fp}, False Negatives (FN): {fn}")
    # Identify indices for further analysis
    false_positives = np.where((true == 1) & (pred == -1))[0]
    false_negatives = np.where((true == -1) & (pred == 1))[0]
    return fp, fn, false_positives, false_negatives

# Function to refine model settings
def refine_model_settings(model_name, fp_indices, fn_indices):
    print(f"Refinement suggestions for {model_name}:")
    print(f"Review data at False Positive indices: {fp_indices[:5]} (showing top 5)")
    print(f"Review data at False Negative indices: {fn_indices[:5]} (showing top 5)")
    print("Consider tuning hyperparameters or re-evaluating thresholds.\n")

# LOF
lof = LocalOutlierFactor()
lof_pred = lof.fit_predict(features_scaled)
lof_pred = np.where(lof_pred == -1, -1, 1)
lof_metrics = calculate_metrics(true_label, lof_pred, "LOF")
lof_errors = analyze_errors(true_label, lof_pred, "LOF")
refine_model_settings("LOF", lof_errors[2], lof_errors[3])

# One-Class SVM
ocsvm = OneClassSVM(nu=0.1, gamma='scale')
ocsvm.fit(features_scaled)
ocsvm_pred = ocsvm.predict(features_scaled)
ocsvm_pred = np.where(ocsvm_pred == -1, -1, 1)
ocsvm_metrics = calculate_metrics(true_label, ocsvm_pred, "One-Class SVM")
ocsvm_errors = analyze_errors(true_label, ocsvm_pred, "One-Class SVM")
refine_model_settings("One-Class SVM", ocsvm_errors[2], ocsvm_errors[3])

# Elliptic Envelope
elliptic = EllipticEnvelope(contamination=0.1)
elliptic.fit(features_scaled)
elliptic_pred = elliptic.predict(features_scaled)
elliptic_pred = np.where(elliptic_pred == -1, -1, 1)
elliptic_metrics = calculate_metrics(true_label, elliptic_pred, "Elliptic Envelope")
elliptic_errors = analyze_errors(true_label, elliptic_pred, "Elliptic Envelope")
refine_model_settings("Elliptic Envelope", elliptic_errors[2], elliptic_errors[3])

# K-Means
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(features_scaled)
kmeans_pred = kmeans.labels_
# Map clusters to labels (assuming smaller cluster is anomaly)
kmeans_pred = np.where(kmeans_pred == kmeans_pred.min(), -1, 1)
kmeans_metrics = calculate_metrics(true_label, kmeans_pred, "K-Means")
kmeans_errors = analyze_errors(true_label, kmeans_pred, "K-Means")
refine_model_settings("K-Means", kmeans_errors[2], kmeans_errors[3])

# DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_pred = dbscan.fit_predict(features_scaled)
# Map noise (-1 in DBSCAN) to anomalies
dbscan_pred = np.where(dbscan_pred == -1, -1, 1)
dbscan_metrics = calculate_metrics(true_label, dbscan_pred, "DBSCAN")
dbscan_errors = analyze_errors(true_label, dbscan_pred, "DBSCAN")
refine_model_settings("DBSCAN", dbscan_errors[2], dbscan_errors[3])

# Autoencoder
# Build and train the autoencoder
input_dim = features_scaled.shape[1]
autoencoder = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(32, activation='relu'),
    Dense(input_dim, activation='sigmoid')
])
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(features_scaled, features_scaled, epochs=10, batch_size=32, verbose=0)

# Reconstruction error as anomaly score
reconstruction_error = np.mean(np.square(features_scaled - autoencoder.predict(features_scaled)), axis=1)
threshold = np.percentile(reconstruction_error, 95)  # Top 5% as anomalies
autoencoder_pred = np.where(reconstruction_error > threshold, -1, 1)
autoencoder_metrics = calculate_metrics(true_label, autoencoder_pred, "Autoencoder")
autoencoder_errors = analyze_errors(true_label, autoencoder_pred, "Autoencoder")
refine_model_settings("Autoencoder", autoencoder_errors[2], autoencoder_errors[3])

# Summary of metrics
print("\nSummary of Model Performance (based on F1-Score):")
models = {
    "LOF": lof_metrics,
    "One-Class SVM": ocsvm_metrics,
    "Elliptic Envelope": elliptic_metrics,
    "K-Means": kmeans_metrics,
    "DBSCAN": dbscan_metrics,
    "Autoencoder": autoencoder_metrics
}

for model, (precision, recall, f1) in models.items():
    print(f"{model}: Precision={precision:.2f}, Recall={recall:.2f}, F1-Score={f1:.2f}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load dataset
file_path = "dataset.csv"
data = pd.read_csv(file_path)

# Assuming the dataset has a column 'true_label' with 1 for normal and -1 for anomalies
# and the rest are feature columns
true_label = data['true_label']
features = data.drop(columns=['true_label'])

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Function to calculate metrics
def calculate_metrics(true, pred, model_name):
    precision = precision_score(true, pred, pos_label=-1)
    recall = recall_score(true, pred, pos_label=-1)
    f1 = f1_score(true, pred, pos_label=-1)
    print(f"{model_name} Metrics:")
    print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
    return precision, recall, f1

# Document model adjustments
adjustments = {}

# LOF
lof = LocalOutlierFactor(n_neighbors=20)  # Initial setting
lof_pred = lof.fit_predict(features_scaled)
lof_pred = np.where(lof_pred == -1, -1, 1)
lof_metrics = calculate_metrics(true_label, lof_pred, "LOF")
# Adjust parameters based on initial evaluation
lof = LocalOutlierFactor(n_neighbors=10)  # Adjusted setting
adjustments["LOF"] = {"n_neighbors": 10}
lof_pred_adjusted = lof.fit_predict(features_scaled)
lof_pred_adjusted = np.where(lof_pred_adjusted == -1, -1, 1)
calculate_metrics(true_label, lof_pred_adjusted, "LOF (Adjusted)")

# One-Class SVM
ocsvm = OneClassSVM(nu=0.1, gamma='scale')  # Initial setting
ocsvm.fit(features_scaled)
ocsvm_pred = ocsvm.predict(features_scaled)
ocsvm_pred = np.where(ocsvm_pred == -1, -1, 1)
ocsvm_metrics = calculate_metrics(true_label, ocsvm_pred, "One-Class SVM")
# Adjust parameters
ocsvm = OneClassSVM(nu=0.05, gamma=0.1)  # Adjusted setting
adjustments["One-Class SVM"] = {"nu": 0.05, "gamma": 0.1}
ocsvm.fit(features_scaled)
ocsvm_pred_adjusted = ocsvm.predict(features_scaled)
ocsvm_pred_adjusted = np.where(ocsvm_pred_adjusted == -1, -1, 1)
calculate_metrics(true_label, ocsvm_pred_adjusted, "One-Class SVM (Adjusted)")

# Elliptic Envelope
elliptic = EllipticEnvelope(contamination=0.1)  # Initial setting
elliptic.fit(features_scaled)
elliptic_pred = elliptic.predict(features_scaled)
elliptic_pred = np.where(elliptic_pred == -1, -1, 1)
elliptic_metrics = calculate_metrics(true_label, elliptic_pred, "Elliptic Envelope")
# Adjust parameters
elliptic = EllipticEnvelope(contamination=0.05)  # Adjusted setting
adjustments["Elliptic Envelope"] = {"contamination": 0.05}
elliptic.fit(features_scaled)
elliptic_pred_adjusted = elliptic.predict(features_scaled)
elliptic_pred_adjusted = np.where(elliptic_pred_adjusted == -1, -1, 1)
calculate_metrics(true_label, elliptic_pred_adjusted, "Elliptic Envelope (Adjusted)")

# K-Means
kmeans = KMeans(n_clusters=2, random_state=42)  # Initial setting
kmeans.fit(features_scaled)
kmeans_pred = kmeans.labels_
# Map clusters to labels (assuming smaller cluster is anomaly)
kmeans_pred = np.where(kmeans_pred == kmeans_pred.min(), -1, 1)
kmeans_metrics = calculate_metrics(true_label, kmeans_pred, "K-Means")
# Adjust parameters
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)  # Adjusted setting
adjustments["K-Means"] = {"n_clusters": 2, "n_init": 10}
kmeans.fit(features_scaled)
kmeans_pred_adjusted = kmeans.labels_
kmeans_pred_adjusted = np.where(kmeans_pred_adjusted == kmeans_pred_adjusted.min(), -1, 1)
calculate_metrics(true_label, kmeans_pred_adjusted, "K-Means (Adjusted)")

# DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)  # Initial setting
dbscan_pred = dbscan.fit_predict(features_scaled)
# Map noise (-1 in DBSCAN) to anomalies
dbscan_pred = np.where(dbscan_pred == -1, -1, 1)
dbscan_metrics = calculate_metrics(true_label, dbscan_pred, "DBSCAN")
# Adjust parameters
dbscan = DBSCAN(eps=0.3, min_samples=10)  # Adjusted setting
adjustments["DBSCAN"] = {"eps": 0.3, "min_samples": 10}
dbscan_pred_adjusted = dbscan.fit_predict(features_scaled)
dbscan_pred_adjusted = np.where(dbscan_pred_adjusted == -1, -1, 1)
calculate_metrics(true_label, dbscan_pred_adjusted, "DBSCAN (Adjusted)")

# Autoencoder
input_dim = features_scaled.shape[1]
autoencoder = Sequential([
    Dense(32, activation='relu', input_dim=input_dim),
    Dense(16, activation='relu'),
    Dense(32, activation='relu'),
    Dense(input_dim, activation='sigmoid')
])
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(features_scaled, features_scaled, epochs=10, batch_size=32, verbose=0)
# Initial prediction
reconstruction_error = np.mean(np.square(features_scaled - autoencoder.predict(features_scaled)), axis=1)
threshold = np.percentile(reconstruction_error, 95)  # Top 5% as anomalies
autoencoder_pred = np.where(reconstruction_error > threshold, -1, 1)
autoencoder_metrics = calculate_metrics(true_label, autoencoder_pred, "Autoencoder")
# Adjust threshold
threshold_adjusted = np.percentile(reconstruction_error, 90)  # Adjusted threshold
adjustments["Autoencoder"] = {"threshold_percentile": 90}
autoencoder_pred_adjusted = np.where(reconstruction_error > threshold_adjusted, -1, 1)
calculate_metrics(true_label, autoencoder_pred_adjusted, "Autoencoder (Adjusted)")

# Document changes and summarize improvements
print("\nModel Adjustments and Improvements:")
for model, params in adjustments.items():
    print(f"{model} Adjustments: {params}")