In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import plotly.express as px
import plotly.graph_objects as go
import os
from sklearn.manifold import TSNE


acc_df = pd.read_csv('00_hist_input_data.csv', sep=',', skiprows=0)
#print(acc_df.head())


In [7]:
os.environ["LOKY_MAX_CPU_COUNT"] = "4"

# select relevant features for clustering:
features = [
    'air_temp_mean_stations', 'wind_speed_max_stations',
    'snow_height_mean_stations', 'new_snow_mean_stations',
]
#, region_id, elevation_group


# remove rows with missing values:
acc_df_cleaned = acc_df.dropna(subset=features).copy()

# extract features and normalize data:
data = acc_df_cleaned[features]
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# K-Means-Clustering K=5:
kmeans = KMeans(n_clusters=5, random_state=0)
cluster_labels = kmeans.fit_predict(data_scaled)

# Add Clusterlabels to DataFrame:
acc_df_cleaned['k_cluster'] = cluster_labels


In [11]:
# t-SNE to visualize the clusters in 2D:
tsne = TSNE(n_components=2, random_state=0, max_iter=250)
data_tsne = tsne.fit_transform(data_scaled)
data_tsne_df = pd.DataFrame(data_tsne, columns=['t-SNE1', 't-SNE2'])
data_tsne_df['k_cluster'] = cluster_labels

# t-SNE-Scatterplot erstellen
fig = px.scatter(data_tsne_df,
                 x='t-SNE1',
                 y='t-SNE2',
                 color='k_cluster',
                 opacity=0.5,
                 title='t-SNE Visualization')

fig.show()

#save t-SNE data:
data_tsne_df.to_csv('02_hist_tSNE_data.csv', index=False)
print(data_tsne_df.head())

     t-SNE1    t-SNE2  k_cluster
0 -0.543733  0.184784          3
1  0.383453  0.428284          0
2 -0.929153  0.221331          3
3 -0.896742  0.246692          2
4  0.586023  0.552704          0


In [12]:
# Calculate mean value for each cluster, not by feature:
cluster_means = data_tsne_df.groupby('k_cluster')[['t-SNE1', 't-SNE2']].mean()
cluster_means.to_csv('04_hist_cluster_means_tSNE.csv')