In [None]:
# === DBSCAN Anomaly Detection ===
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt


In [None]:

# Load dataset
df = pd.read_csv('equipment_anomaly_dataset.csv')

df = pd.read_csv('equipment_anomaly_dataset.csv')
print("Original dataset shape:", df.shape)
df.head()

In [None]:
# Drop non-numeric columns (like IDs or text)
df_numeric = df.select_dtypes(include=[np.number]).dropna(axis=1)

# Scale numeric features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_numeric)

# Reduce to 2D for visualization
pca = PCA(n_components=2)
X_2d = pca.fit_transform(X_scaled)

print("PCA-reduced shape:", X_2d.shape)


In [None]:
# Apply DBSCAN
dbscan = DBSCAN(eps=0.3, min_samples=5)
labels = dbscan.fit_predict(X_2d)
anomalies = (labels == -1)
print(f"Detected anomalies: {np.sum(anomalies)}")

# Plot before and after
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
ax1.scatter(X_2d[:, 0], X_2d[:, 1], s=30, alpha=0.7)
ax1.set_title("Original Data (PCA-Reduced)")
ax2.scatter(X_2d[:, 0], X_2d[:, 1], c=anomalies, cmap='coolwarm', s=30)
ax2.set_title("DBSCAN Anomaly Detection")
for ax in (ax1, ax2):
    ax.set_xlabel("PC1")
    ax.set_ylabel("PC2")
plt.tight_layout()
plt.show()
