In [77]:
import json
import pandas as pd
import numpy as np
from pathlib import Path
from PIL import Image
from sklearn.cluster import MiniBatchKMeans

In [78]:
# Load color names CSV file into DataFrame
df = pd.read_csv(r'C:\Users\Tototime\Desktop\Project_DataMining\data\color_names.csv', sep=',', header=0)
df_selected = df.loc[:, ["Name", "Hex (24 bit)"]]
df_selected.rename(columns={"Hex (24 bit)": "Hex"}, inplace=True)

# delete the # in the hex value
df_selected['Hex'] = df_selected['Hex'].str.replace('#', '')

with open('color_names.json', 'w') as f:
    f.write(df_selected.to_json(orient='records')) 

with open('color_names.json', 'r') as f:
    data = json.load(f)

with open('color_names.json', 'w') as f:
    json.dump(data, f, indent=4)

In [79]:
# Loop through images and extract dominant color
color_data = {}
img_dir = r"C:\Users\Tototime\Desktop\Project_DataMining\pokemon_jpg"
for img_filename in os.listdir(img_dir): 
    if img_filename.endswith(".jpg") or img_filename.endswith(".png"):
        # construct the full path to the image file
        img_path = os.path.join(img_dir, img_filename)
        # Ouvrir l'image
        with Image.open(img_path) as img:
            
            # Extraire la matrice de pixels
            # remove images with a name which are not this format: 1.jpg, 2.jpg, 3.jpg, etc.
            if not img_filename.split(".")[0].isdigit():
                continue

            pixel_matrix = np.array(img)

            # Extraire les valeurs R, G, B
            pixel_data = pixel_matrix.reshape((-1, 3))

            # Utiliser MiniBatchKMeans pour trouver le cluster le plus grand
            kmeans = MiniBatchKMeans(n_clusters=2, random_state=0).fit(pixel_data)
            main_color = kmeans.cluster_centers_[np.argmax(np.unique(kmeans.labels_, return_counts=True)[1])]
            hex_value = "{0:02X}{1:02X}{2:02X}".format(int(main_color[0]), int(main_color[1]), int(main_color[2]))


            # create a dictionary of the colors for this image
            color = {
                "id": int((img_filename.split("\\")[-1]).split(".")[0]),
                "couleur dominante": main_color.tolist(),
                "nom couleur": hex_value
            }

            # add the color for this image to the dictionary of all colors
            color_data[(img_filename.split("\\")[-1]).split(".")[0]] = color




In [80]:
# write the colors for all images in a JSON file
with open('color_data.json', 'w') as f:
    json.dump(color_data, f, indent=4)

In [81]:
# compare the color_data.json file with the color_names.json file
# to get the color name from the color map
with open('color_names.json', 'r') as f:
    color_names = json.load(f)

with open('color_data.json', 'r') as f:
    color_data = json.load(f)

for pokemon in color_data:
    color_hex = color_data[pokemon]['nom couleur']
    for color in color_names:
        color_rgb = (int(color_hex[1:2], 16), int(color_hex[2:4], 16), int(color_hex[4:], 16))
        color_rgb_names = (int(color['Hex'][1:2], 16), int(color['Hex'][2:4], 16), int(color['Hex'][4:], 16))
        # verify if all three values are the same
        if (color_rgb[0] == color_rgb_names[0] and color_rgb[1] == color_rgb_names[1] and color_rgb[2] == color_rgb_names[2]):
            color_data[pokemon]['nom couleur'] = color['Name']
        # verify if two of the three values are the same
        elif ((color_rgb[0] == color_rgb_names[0] and color_rgb[1] == color_rgb_names[1])
        or (color_rgb[0] == color_rgb_names[0] and color_rgb[2] == color_rgb_names[2]) 
        or (color_rgb[1] == color_rgb_names[1] and color_rgb[2] == color_rgb_names[2])):
            color_data[pokemon]['nom couleur'] = color['Name']
        # verify if one of the three values is the same
        elif (color_rgb[0] == color_rgb_names[0] or color_rgb[1] == color_rgb_names[1] or color_rgb[2] == color_rgb_names[2]):
            color_data[pokemon]['nom couleur'] = color['Name']

with open('color_data.json', 'w') as f:
    json.dump(color_data, f, indent=4)

In [None]:
# add the color name to the pokemon data
with open('database.json', 'r') as f:
    pokemon_data = json.load(f)

with open('color_data.json', 'r') as f:
    color_data = json.load(f)

# if id is the same, add the color name to the pokemon data
for pokemon in pokemon_data:
    for color in color_data:
        if pokemon_data[pokemon]['id'] == color_data[color]['id']:
            pokemon_data[pokemon]['couleur dominante'] = color_data[color]['nom couleur']

with open('database.json', 'w') as f:
    json.dump(pokemon_data, f, indent=4)
