### UNSUPERVISED LEARNING (K-MEANS)
#### Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
df = pd.read_csv(r"C:\Users\madha\Downloads\12310219-PA\nasa_exoplanets.csv", sep=',')

#### Features Selection

In [None]:
cluster_features = [
    "pl_orbper",
    "pl_rade",
    "pl_bmasse",
    "st_teff",
    "st_mass"
]

df_cluster = df[cluster_features].copy()

# Fill missing values (simple & beginner)
for col in cluster_features:
    df_cluster[col] = df_cluster[col].fillna(df_cluster[col].median())


#### Scale the Data

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_cluster)


#### Number of Clusters (Elbow Method)

In [None]:
inertia = []

for k in range(2, 8):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(6,4))
plt.plot(range(2, 8), inertia, marker="o")
plt.xlabel("Number of Clusters (K)")
plt.ylabel("Inertia")
plt.title("Elbow Method for Optimal K")
plt.show()


#### Apply K-Means Clustering

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
clusters = kmeans.fit_predict(X_scaled)
df_cluster = df.copy()
df_cluster["cluster"] = clusters

#### Visualization

In [None]:
df_plot = df_cluster[
    (df_cluster["pl_orbper"] > 0) &
    (df_cluster["pl_orbper"] < 1e7)
]
sns.set_theme(
    style="darkgrid",
    rc={
        "axes.facecolor": "#f2f3f7",
        "figure.facecolor": "#f2f3f7",
        "grid.color": "white",
        "grid.linestyle": "--",
        "grid.alpha": 0.6
    }
)
plt.figure(figsize=(10, 7))

sns.scatterplot(
    data=df_plot,
    x="pl_orbper",
    y="pl_rade",
    hue="cluster",
    palette="Set2",
    s=90,
    alpha=0.8,
    edgecolor="black",
    linewidth=0.5
)

plt.xscale("log")
plt.xlabel("Orbital Period (days, log scale)")
plt.ylabel("Planet Radius (Earth radii)")
plt.title("K-Means Clustering of Exoplanets", fontsize=13)

plt.legend(title="Cluster", frameon=False)
sns.despine(left=True, bottom=True)
plt.tight_layout()
plt.show()