### Load Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler

# Load latent features from VAE
latent_features = pd.read_csv('latent_features_vae.csv')

# Load TabNet predictions
tabnet_predictions = pd.read_csv('tabnet_predictions.csv')

# Merge data
data = latent_features.merge(tabnet_predictions, on='Id', how='inner')


### Clustering

In [None]:
# Perform clustering on latent features
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
data['Cluster'] = kmeans.fit_predict(data.iloc[:, :-2])  # Exclude 'Id' and 'Predicted_Health_Index'

# Map clusters to severity index
data['Severity_Index'] = data['Cluster']


### Visulaistaion

In [None]:
# t-SNE Visualization
tsne = TSNE(n_components=2, random_state=42)
tsne_results = tsne.fit_transform(data.iloc[:, :-3])  # Exclude 'Id', 'Predicted_Health_Index', 'Cluster'

# Plot t-SNE results
plt.figure(figsize=(10, 6))
sns.scatterplot(x=tsne_results[:, 0], y=tsne_results[:, 1], hue=data['Cluster'], palette='viridis')
plt.title('t-SNE Visualization of Latent Features')
plt.show()


### Analysis

In [None]:
# Analyze clusters
cluster_summary = data.groupby('Cluster').mean()
display(cluster_summary)

# Visualize Predicted Health Index by Cluster
plt.figure(figsize=(10, 6))
sns.boxplot(x='Cluster', y='Predicted_Health_Index', data=data)
plt.title('Predicted Health Index by Cluster')
plt.show()


### Save Analysis Results

In [None]:
# Save the data with clusters and severity index
data.to_csv('analysis_results.csv', index=False)
print("Analysis results saved.")
