In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
import joblib
import os

# === 1. Load dataset ===
df = pd.read_csv("life_expectancy.csv")  # Make sure file exists at this path

# === 2. Define and extract numeric columns ===
numeric_cols = [
    'Sum of Females  Life Expectancy',
    'Sum of Life Expectancy  (both sexes)',
    'Sum of Males  Life Expectancy'
]

# Drop rows with missing values
data = df[numeric_cols].dropna()

# === 3. Scale the data ===
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data)

# === 4. Apply DBSCAN ===
db = DBSCAN(eps=0.5, min_samples=3)
clusters = db.fit_predict(X_scaled)

# === 5. Save the model and scaler ===
os.makedirs("model", exist_ok=True)
joblib.dump(db, "model/dbscan_model.joblib")
joblib.dump(scaler, "model/scaler.joblib")

# === 6. Append cluster labels and save result ===
data["cluster"] = clusters
os.makedirs("data", exist_ok=True)
data.to_csv("data/clustered_data.csv", index=False)

print("✅ DBSCAN clustering complete.")
print("📊 Clusters found:", set(clusters))


✅ DBSCAN clustering complete.
📊 Clusters found: {np.int64(0), np.int64(-1)}
