In [25]:
import pandas as pd
import numpy as np
from ast import literal_eval

# Load the CSV file
data = pd.read_csv("complete_fixed_filtered_char_color.csv")

# Drop columns safely
columns_to_drop = ["Unnamed: 0.1", "test", "Unnamed: 0"]
data = data.drop(columns=[col for col in columns_to_drop if col in data.columns], errors='ignore')

# Convert string representations of lists/dictionaries to Python objects
data['colors'] = data['colors'].apply(lambda x: literal_eval(x) if isinstance(x, str) else x)

# Display the first few rows
data.head()

Unnamed: 0,jp_title,char_name,fav,img,colors
0,Horimiya,Izumi Miyamura (宮村 伊澄),16137,https://cdn.myanimelist.net/images/characters/...,"[[194, 222, 249], [185, 220, 250], [177, 200, ..."
1,Horimiya,Kyouko Hori (堀 京子),12528,https://cdn.myanimelist.net/images/characters/...,"[[220, 237, 253], [208, 227, 250], [192, 203, ..."
2,Horimiya,Yuki Yoshikawa (吉川 由紀),1502,https://cdn.myanimelist.net/images/characters/...,"[[195, 219, 249], [158, 226, 245], [141, 216, ..."
3,Mushoku Tensei: Isekai Ittara Honki Dasu,Rudeus Greyrat (ルーデウス・グレイラット),10221,https://cdn.myanimelist.net/images/characters/...,"[[221, 234, 246], [179, 215, 251], [140, 207, ..."
4,Mushoku Tensei: Isekai Ittara Honki Dasu,Roxy Migurdia (ロキシー・ミグルディア),9320,https://cdn.myanimelist.net/images/characters/...,"[[215, 231, 239], [182, 206, 247], [162, 196, ..."


In [32]:
# Flatten all color lists (ensure each row is a 30x3 list)
all_colors = np.array([color for sublist in data['colors'] if isinstance(sublist, list) for color in sublist])

print("Number of character:", len(data['colors'])) 
print("Expected shape:", (len(data['colors'])*30, 3))
print("Real color", all_colors.shape)
print("Flatten:", 9240*3)

Number of character: 308
Expected shape: (9240, 3)
Real color (9240, 3)
Flatten: 27720


In [34]:
list_color = []

for character_color in data['colors']:
    if type(character_color) == list:
        for color in character_color:
            list_color.append(color)

used_colors = np.array(list_color)
print(used_colors)

[[194 222 249]
 [185 220 250]
 [177 200 225]
 ...
 [ 77 150 246]
 [251 244 242]
 [252 248 247]]


In [35]:
from sklearn.cluster import KMeans

num_clusters = 10

kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit_predict(all_colors)

array([2, 1, 1, ..., 6, 2, 2], shape=(9240,), dtype=int32)

In [62]:
all_colors.shape

(17490, 3)

In [59]:
all_colors.reshape(52470,3)

ValueError: cannot reshape array of size 52470 into shape (52470,3)

In [50]:
type(data['colors'][0])

list