In [2]:
import pandas as pd
import numpy as np
from PIL import Image
from collections import Counter
from sklearn.cluster import KMeans

In [4]:
df = pd.read_csv("pokemon_color.csv")
print(df.head())

             Name            Color
0  abomasnow-mega  (247, 249, 251)
1       abomasnow  (250, 250, 251)
2            abra  (254, 254, 254)
3      absol-mega  (247, 249, 252)
4           absol  (249, 250, 252)


In [6]:
def get_dominant_color(image, k=3):

    image = image.convert("RGB") ## This is important because the dataset we are handling has some greyscale or RGBA images
    ## This will prevent "ValueError: cannot reshape array of size 902500 into shape (3)"
    
    img = np.array(image) ## Convert image into numpy array so that the computer can understand it
    
    pixels = img.reshape((-1, 3)) ## Reshape the array into a list of pixels. So each pixel will be characterized by the amount of RGB in it

    # Apply K-Means clustering
    
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) 
    ## K-Means randomly initializes centroids and sometimes it gets stuck in local minima
    ## n_init chooses different inital value cases so that this can be avoided
    
    kmeans.fit(pixels)

    counts = Counter(kmeans.labels_) ## counts each cluster instance
    dominant_cluster = counts.most_common(1)[0][0] ## chooses the most dominant cluster
    dominant_color = kmeans.cluster_centers_[dominant_cluster]
    
    return dominant_color.astype(int)

In [26]:
def classify(image, df):
    new_image = Image.open(image)
    new_color = get_dominant_color(new_image)
    
    distances = []
    for index, row in df.iterrows():
        color_tuple = eval(row["Color"])  
        color_tuple = np.array(color_tuple)  
        distance = np.linalg.norm(color_tuple - np.array(new_color))  
        distances.append(distance)
    
    df["Distance"] = distances
    best_match = df.loc[df["Distance"].idxmin(), "Name"]
    
    print(f"The predicted Pokémon is: {best_match}")

In [28]:
image_path = "/Users/kalparunsarkar/Documents/Pokemon ML model ACM/model/unknown pokemon.png"
classify(image_path, df)



The predicted Pokémon is: rotom-mow


This model is hilariously off course. Further refinement is required