In [None]:
# Setup
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# Load Excel data
sys.path.append(os.path.abspath(".."))
file_path = os.path.join("..", "Data", "Fertige Tabelle.xlsx")
df = pd.read_excel(file_path)
df.columns = df.columns.str.strip()
from lists import likert_mapping, likert_questions

# Convert Likert-scale responses to numeric values
df_numeric = df[likert_questions].replace(likert_mapping)

# Drop rows with missing values in the selected columns
df_numeric = df_numeric.dropna()

# Standardize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_numeric)

# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_data)

# Apply KMeans clustering (you can change n_clusters)
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(pca_result)

# Add results back to DataFrame
df_plot = pd.DataFrame(pca_result, columns=["PC1", "PC2"])
df_plot["Cluster"] = clusters

# Plotting
plt.figure(figsize=(10, 7))
sns.scatterplot(data=df_plot, x="PC1", y="PC2", hue="Cluster", palette="tab10", s=100)
plt.title("PCA of Likert-scale AI Attitudes (KMeans Clustering)")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend(title="Cluster", loc="best")
plt.grid(True)
plt.tight_layout()
plt.show()
