In [None]:
import pandas as pd
import numpy as np
import umap
import matplotlib.pyplot as plt
import os

# =====================================================
# Step 1. Load the dataset
# =====================================================
data_path = "Data.xlsx"
df = pd.read_excel(data_path)

# Check column names
if "Hv" not in df.columns:
    raise ValueError("The dataset must contain a target column named 'Hv'.")

# =====================================================
# Step 2. Separate target and feature columns
# =====================================================
y = df["Hv"].values
X = df.drop(columns=["Hv"]).values

# =====================================================
# Step 3. Perform UMAP dimensionality reduction
# =====================================================
reducer = umap.UMAP(
    n_neighbors=15,       # Adjustable depending on data size
    min_dist=0.1,
    n_components=2,       # Reduce to 2D for visualization
    random_state=42
)

embedding = reducer.fit_transform(X)

# =====================================================
# Step 4. Save dimensionality reduction results
# =====================================================
result_df = pd.DataFrame(embedding, columns=["UMAP-1", "UMAP-2"])
result_df["Hv"] = y

save_excel_path = "UMAP_Result.xlsx"
result_df.to_excel(save_excel_path, index=False)
print(f"✅ Dimensionality reduction results saved as: {save_excel_path}")

# =====================================================
# Step 5. Plot and save the scatter plot
# =====================================================
plt.figure(figsize=(8, 6))
sc = plt.scatter(result_df["UMAP-1"], result_df["UMAP-2"], c=result_df["Hv"], cmap='viridis', s=50)
plt.colorbar(sc, label="Hv (Hardness)")
plt.title("UMAP Projection of Data")
plt.xlabel("UMAP-1")
plt.ylabel("UMAP-2")
plt.tight_layout()

save_fig_path = "UMAP_Plot.png"
plt.savefig(save_fig_path, dpi=300)
plt.show()

print(f"✅ Figure saved as: {save_fig_path}")