# Loading Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Loading encoded data

In [None]:
data = pd.read_csv('../data/jojo-stands.csv', encoding='latin1')

In [None]:
data.head()

# Handling null stats by creating new class

In [None]:
# Removing null values
data[['PWR','SPD','RNG','PER','PRC','DEV']] = data.drop('Stand', axis=1).fillna('F')

# Encoding data (str -> int) for model creation

In [None]:
# Encoding data

# Encode map
grid_map = {'F': 0, 'E': 2, 'D': 4, 'C': 6, 'B': 8, 'A': 10, 'Infi': 20}

# Data encoding
data[['PWR','SPD','RNG','PER','PRC','DEV']] = data.drop('Stand', axis=1).replace(grid_map)
data.to_csv('../data/encoded-jojo-stands.csv', encoding='latin1', index=False)

# Preprocessing

In [None]:
x = data.drop('Stand', axis=1)

In [None]:
# Creating scaler to normalize values
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

# Clustering

In [None]:
kmeans = KMeans(n_clusters=4,random_state=42)
cluster_labels = kmeans.fit_predict(x_scaled)
mapping = {3:0,1:1,0:2,2:3}

mapped_labels = [mapping[label] for label in cluster_labels]

data['Rank'] = mapped_labels

In [None]:
pca = PCA(n_components=2)
pca_components = pca.fit_transform(x_scaled)


plt.figure(figsize=(8,6))
plt.scatter(pca_components[:, 0], pca_components[:, 1], c=data['Rank'], cmap='viridis', s=60)
plt.title("PCA of JoJo Stands (Colored by Cluster)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar(label='Rank')
plt.show()

# Creating labeled csv

In [None]:
data.to_csv('../data/labeled-encoded-jojo-stands.csv', encoding='latin1', index=False)

In [None]:
data['Rank'].value_counts().reset_index()

In [None]:
centroids = kmeans.cluster_centers_

centroids_original = scaler.inverse_transform(centroids)
centroid_df = pd.DataFrame(centroids_original, columns=['PWR', 'SPD', 'RNG', 'PER', 'PRC', 'DEV'])
centroid_df.index.name = 'Rank'
print(centroid_df)