In [3]:
import json
from collections import Counter
from PIL import ImageColor
from scipy.spatial import KDTree
from webcolors import (
    CSS3_HEX_TO_NAMES,
    hex_to_rgb,
)

with open('../merged.json') as p:
    data = json.load(p)

We will be converting the Pokemons' hex code color values into real color names, for easier analysis later on.

In [4]:
css3_db = CSS3_HEX_TO_NAMES

# Function to get the color name from an RGB value
def convert_rgb_to_names(rgb_tuple):
    
    # a dictionary of all the hex and their respective names in css3
    names = []
    rgb_values = []    
    for color_hex, color_name in css3_db.items():
        names.append(color_name)
        rgb_values.append(hex_to_rgb(color_hex))
    
    kdt_db = KDTree(rgb_values)    
    distance, index = kdt_db.query(rgb_tuple)
    return names[index]

In [59]:
def remake_pokemon_json(data, use_main_color = True, skip_colors=[]):
    new_dict = []
    for dex_num, poke in data.items():
        new_poke_dict = dict()
        color_names = set()

        for color in poke['colors']:
            color_rgb = ImageColor.getcolor(color, "RGB") # conver hex code to rgb
            color_name = convert_rgb_to_names(color_rgb) # get color name based on rgb values

            if (color_name not in skip_colors):
                color_names.add(color_name)


        # Set up the new pokemon object

        new_poke_dict['id'] = dex_num

        try:
            new_poke_dict['name'] = poke['name']
        except:
            new_poke_dict['name'] = ''

        new_poke_dict['types'] = poke['types']

        try:
            new_poke_dict['main_color'] = poke['color']
        except:
            new_poke_dict['main_color'] = ''

        if (use_main_color and new_poke_dict['main_color'] != ''):
            color_names.add(new_poke_dict['main_color'])

        new_poke_dict['colors'] = list(color_names)
        new_poke_dict['colors'].sort()

        new_dict.append(new_poke_dict)

    return new_dict

In [60]:
new_poke_dict = remake_pokemon_json(data, skip_colors=['white','black'])

with open('data/new_pokemon_list.json', 'w') as outfile:
    json.dump(new_poke_dict, outfile)

In [61]:
print(new_poke_dict[0])
print(new_poke_dict[110])
print(new_poke_dict[270])

{'id': '001', 'name': 'bulbasaur', 'types': ['Grass', 'Poison'], 'main_color': 'green', 'colors': ['darkseagreen', 'gray', 'green']}
{'id': '111', 'name': 'rhyhorn', 'types': ['Ground', 'Rock'], 'main_color': 'gray', 'colors': ['darkgray', 'darkslategray', 'dimgray', 'gray']}
{'id': '271', 'name': 'lombre', 'types': ['Water', 'Grass'], 'main_color': 'green', 'colors': ['darkkhaki', 'green', 'olivedrab', 'silver']}


Curious to see what the counts are for individual colors, and then also typing(s)

In [62]:
hex_codes = []

for poke in new_poke_dict:
    color_list = poke['colors']
    hex_codes.extend(color_list)

hex_codes_count = Counter(hex_codes)

hex_codes_count.most_common(10)

[('darkslategray', 388),
 ('dimgray', 284),
 ('gray', 245),
 ('blue', 157),
 ('silver', 157),
 ('darkgray', 157),
 ('brown', 145),
 ('indianred', 112),
 ('snow', 110),
 ('green', 108)]

In [18]:
typings = []

for poke in new_poke_dict:
    if len(poke['types']) > 1:
        type_tuple = (poke['types'][0], poke['types'][1])
    else:
        type_tuple = (poke['types'][0],)
    
    typings.append(type_tuple)

typings_count = Counter(typings)

typings_count.most_common(10)

[(('Normal',), 69),
 (('Water',), 67),
 (('Grass',), 43),
 (('Psychic',), 37),
 (('Fire',), 33),
 (('Electric',), 32),
 (('Fighting',), 27),
 (('Normal', 'Flying'), 26),
 (('Bug',), 19),
 (('Fairy',), 18)]

Using our new Pokemon objects, let's see what the co-occurrences are like for colors and typings (at first, let's look at them individually). To do this, we can make a reusable function that will aggregate the co-occurrences of values based on a key we choose:

In [29]:
from collections import defaultdict, Counter

def count_co_occurrences(pokemon_data, key):
    
    #--- Your code starts here

    co_occurrences = defaultdict(lambda: Counter())

    for pokemon in pokemon_data:
        for value in pokemon[key]:
            co_occur_list = [ val for val in pokemon[key]]
            co_occurrences[value] += Counter(co_occur_list)

    #--- Your code ends here            
    
    return co_occurrences

In [63]:
# Typing co-occurrences
typing_co_occurrences = count_co_occurrences(new_poke_dict, 'types')

# color co-occurrences
color_co_occurrences = count_co_occurrences(new_poke_dict, 'colors')

print("Number of times 'Grass' and 'Poison' co-occur:")
print(typing_co_occurrences['Grass']['Poison'])

print("Number of times 'Grass' and 'Fire' co-occur:")
print(typing_co_occurrences['Grass']['Fire'])

print("Number of times 'Black' and 'White' co-occur:")
print(color_co_occurrences['black']['white'])

print("Number of times 'Black' and 'Salmon' co-occur:")
print(color_co_occurrences['black']['salmon'])


Number of times 'Grass' and 'Poison' co-occur:
14
Number of times 'Grass' and 'Fire' co-occur:
0
Number of times 'Black' and 'White' co-occur:
0
Number of times 'Black' and 'Salmon' co-occur:
0


The above gives us some interesting information about what types and colors co-occur the most with each other. As we see, grass and poison co-occur only 14 times among 905 records, while grass and fire never occur together (makes some sense). For the colors, we can see that black and white occur a lot together, while black and 'salmon' occur together barely at all.

Let us take this a little further, and see what the probabilities are like for certain typings to co-occur, and certain colors to co-occur.

In [31]:
# Function to get probability of one value occurring in the total number
def prob_value(total, co_occurrences, value):
    probability = co_occurrences[value][value] / total
    return probability

# Function to get probability that 2 values occur together
def prob_value_pair(total, co_occurrences, value1, value2):
    probability = co_occurrences[value1][value2] / total
    return probability

# Final function to get the probability of a value given another value
def prob_value_given_value(total, co_occurrences, value1, value2):
    prob_pair = prob_value_pair(total, co_occurrences, value1, value2)
    prob_b = prob_value(total, co_occurrences, value2)

    probability = prob_pair / prob_b
    return probability

In [39]:
print("Probability of a grass type:")
print(prob_value(len(new_poke_dict), typing_co_occurrences, 'Grass'))
print()
print("Probability of a grass/poison type:")
print(prob_value_pair(len(new_poke_dict), typing_co_occurrences, 'Grass', 'Poison'))
print()
print("Probability a Pokemon will be Poison given it is Grass:")
print(prob_value_given_value(len(new_poke_dict), typing_co_occurrences, 'Poison', 'Grass'))
print()
print("Probability that a Pokemon is both black and salmon colored:")
print(prob_value_pair(len(new_poke_dict), color_co_occurrences, 'salmon', 'black'))
print()
print("Probability a Pokemon has the color 'salmon' given that it has 'black':")
print(prob_value_given_value(len(new_poke_dict), color_co_occurrences, 'salmon', 'black'))

Probability of a grass type:
0.11823204419889503

Probability of a grass/poison type:
0.015469613259668509

Probability a Pokemon will be Poison given it is Grass:
0.1308411214953271

Probability that a Pokemon is both black and salmon colored:
0.0022099447513812156

Probability a Pokemon has the color 'salmon' given that it has 'black':
0.0033057851239669425


The above gives us some good details about the likelihood that certain types and colors will co-occur, both just overall and if one is already set to occur. For example, while Grass/Poison is only probable 1.55% of the time, if a Pokemon IS a Grass type, the probability it will be Poison is higher at 13.08%.

Let's take a look at the complete probabilities for certain types and colors.

In [40]:
def likely_co_values(total, co_occurrences, value1):
    ## initialize a Counter() for the co-ingredient probabilities
    probs = Counter()

    for value2 in co_occurrences[value1]:
        probs[value2] += prob_value_given_value(total, co_occurrences, value2, value1)

    return probs

In [65]:
print("Typings most likely to pair with 'Grass':")
print(likely_co_values(len(new_poke_dict), typing_co_occurrences, "Grass").most_common())
print()
print("Typings most likely to pair with 'Dragon':")
print(likely_co_values(len(new_poke_dict), typing_co_occurrences, "Dragon").most_common())
print()
print()
print("Colors likely to pair with 'salmon':")
print(likely_co_values(len(new_poke_dict), color_co_occurrences, "salmon").most_common(10))
print()
print("Colors likely to pair with 'green':")
print(likely_co_values(len(new_poke_dict), color_co_occurrences, "green").most_common(50))


Typings most likely to pair with 'Grass':
[('Grass', 1.0), ('Poison', 0.1308411214953271), ('Bug', 0.056074766355140186), ('Flying', 0.056074766355140186), ('Ghost', 0.056074766355140186), ('Fairy', 0.04672897196261682), ('Psychic', 0.03738317757009346), ('Dark', 0.03738317757009346), ('Water', 0.028037383177570093), ('Fighting', 0.028037383177570093), ('Steel', 0.028037383177570093), ('Dragon', 0.028037383177570093), ('Rock', 0.01869158878504673), ('Ice', 0.01869158878504673), ('Normal', 0.01869158878504673), ('Ground', 0.009345794392523366)]

Typings most likely to pair with 'Dragon':
[('Dragon', 1.0), ('Flying', 0.10714285714285714), ('Ground', 0.10714285714285714), ('Ghost', 0.07142857142857142), ('Dark', 0.07142857142857142), ('Water', 0.05357142857142857), ('Poison', 0.05357142857142857), ('Grass', 0.05357142857142857), ('Psychic', 0.03571428571428571), ('Steel', 0.03571428571428571), ('Fire', 0.03571428571428571), ('Electric', 0.03571428571428571), ('Rock', 0.03571428571428571),

In [10]:
import networkx as nx

def create_pokemon_graph(data, key):

    #--- Your code starts here
    graph = nx.Graph()

    nodes = []
    edges = []
    for pokemon in data:
        for node in pokemon[key]:
            nodes.append(node)
            for item in pokemon[key]:
                edges.append((item, node))

    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)

    return graph

In [11]:
pokemon_color_graph = create_pokemon_graph(new_poke_dict, 'colors')

for edge in pokemon_color_graph.edges:
    if (edge[0] == 'black'):
        print(edge)

('black', 'black')
('black', 'darkseagreen')
('black', 'gray')
('black', 'green')
('black', 'white')
('black', 'mediumaquamarine')
('black', 'palevioletred')
('black', 'slategray')
('black', 'cadetblue')
('black', 'seagreen')
('black', 'snow')
('black', 'bisque')
('black', 'burlywood')
('black', 'red')
('black', 'rosybrown')
('black', 'chocolate')
('black', 'salmon')
('black', 'wheat')
('black', 'blue')
('black', 'skyblue')
('black', 'tan')
('black', 'dimgray')
('black', 'silver')
('black', 'khaki')
('black', 'peru')
('black', 'darkkhaki')
('black', 'olivedrab')
('black', 'yellowgreen')
('black', 'ghostwhite')
('black', 'indianred')
('black', 'brown')
('black', 'lightgray')
('black', 'darkolivegreen')
('black', 'yellow')
('black', 'gainsboro')
('black', 'sandybrown')
('black', 'sienna')
('black', 'palegoldenrod')
('black', 'coral')
('black', 'darkgray')
('black', 'purple')
('black', 'darksalmon')
('black', 'plum')
('black', 'saddlebrown')
('black', 'lightsteelblue')
('black', 'teal')
(

In [19]:
color_type_combos = []

skip_white = False

for poke in new_poke_dict:
    for color in poke['colors']:
        if len(poke['types']) > 1:
            type_tuple = (poke['types'][0], poke['types'][1])
        else:
            type_tuple = (poke['types'][0],)

        if (color != 'white' or not skip_white):
            combo_tuple = (type_tuple, color)
            color_type_combos.append(combo_tuple)

combo_counter = Counter(color_type_combos)

combo_counter.most_common(10)

[((('Normal',), 'white'), 64),
 ((('Normal',), 'black'), 53),
 ((('Water',), 'white'), 50),
 ((('Water',), 'black'), 44),
 ((('Water',), 'blue'), 41),
 ((('Grass',), 'white'), 37),
 ((('Psychic',), 'white'), 36),
 ((('Grass',), 'black'), 35),
 ((('Fire',), 'white'), 31),
 ((('Grass',), 'green'), 28)]