In [2]:
import json
from collections import Counter
from PIL import ImageColor
from scipy.spatial import KDTree
from webcolors import (
    CSS3_HEX_TO_NAMES,
    hex_to_rgb,
)

with open('../merged.json') as p:
    data = json.load(p)

We will be converting the Pokemons' hex code color values into real color names, for easier analysis later on.

In [3]:
css3_db = CSS3_HEX_TO_NAMES

# Function to get the color name from an RGB value
def convert_rgb_to_names(rgb_tuple):
    
    # a dictionary of all the hex and their respective names in css3
    names = []
    rgb_values = []    
    for color_hex, color_name in css3_db.items():
        names.append(color_name)
        rgb_values.append(hex_to_rgb(color_hex))
    
    kdt_db = KDTree(rgb_values)    
    distance, index = kdt_db.query(rgb_tuple)
    return names[index]

In [4]:
def remake_pokemon_json(data, use_main_color = True, skip_colors=[]):
    new_dict = []
    for dex_num, poke in data.items():
        new_poke_dict = dict()
        color_names = set()

        for color in poke['colors']:
            color_rgb = ImageColor.getcolor(color, "RGB") # conver hex code to rgb
            color_name = convert_rgb_to_names(color_rgb) # get color name based on rgb values

            if (color_name not in skip_colors):
                color_names.add(color_name)


        # Set up the new pokemon object

        new_poke_dict['id'] = dex_num

        try:
            new_poke_dict['name'] = poke['name']
        except:
            new_poke_dict['name'] = ''

        new_poke_dict['types'] = poke['types']

        try:
            new_poke_dict['main_color'] = poke['color']
        except:
            new_poke_dict['main_color'] = ''

        if (use_main_color and new_poke_dict['main_color'] != ''):
            color_names.add(new_poke_dict['main_color'])

        new_poke_dict['colors'] = list(color_names)
        new_poke_dict['colors'].sort()

        new_dict.append(new_poke_dict)

    return new_dict

In [5]:
new_poke_dict = remake_pokemon_json(data, skip_colors=['white','black'])

with open('data/new_pokemon_list.json', 'w') as outfile:
    json.dump(new_poke_dict, outfile)

In [6]:
print(new_poke_dict[0])
print(new_poke_dict[110])
print(new_poke_dict[270])

{'id': '001', 'name': 'bulbasaur', 'types': ['Grass', 'Poison'], 'main_color': 'green', 'colors': ['darkseagreen', 'gray', 'green']}
{'id': '111', 'name': 'rhyhorn', 'types': ['Ground', 'Rock'], 'main_color': 'gray', 'colors': ['darkgray', 'darkslategray', 'dimgray', 'gray']}
{'id': '271', 'name': 'lombre', 'types': ['Water', 'Grass'], 'main_color': 'green', 'colors': ['darkkhaki', 'green', 'olivedrab', 'silver']}


Curious to see what the counts are for individual colors, and then also typing(s)

In [7]:
hex_codes = []

for poke in new_poke_dict:
    color_list = poke['colors']
    hex_codes.extend(color_list)

hex_codes_count = Counter(hex_codes)
all_colors = list(hex_codes_count.keys())
all_colors.sort()

hex_codes_count.most_common(10)

[('darkslategray', 388),
 ('dimgray', 284),
 ('gray', 245),
 ('blue', 157),
 ('silver', 157),
 ('darkgray', 157),
 ('brown', 145),
 ('indianred', 112),
 ('snow', 110),
 ('green', 108)]

In [8]:
typings = []
all_typings = []

for poke in new_poke_dict:
    for type in poke['types']:
        all_typings.append(type)

    if len(poke['types']) > 1:
        type_tuple = (poke['types'][0], poke['types'][1])
    else:
        type_tuple = (poke['types'][0],)
    
    typings.append(type_tuple)

typings_count = Counter(typings)
all_typings = set(all_typings)
all_typings = list(all_typings)
all_typings.sort()

print(typings_count.most_common(10))

[(('Normal',), 69), (('Water',), 67), (('Grass',), 43), (('Psychic',), 37), (('Fire',), 33), (('Electric',), 32), (('Fighting',), 27), (('Normal', 'Flying'), 26), (('Bug',), 19), (('Fairy',), 18)]


Using our new Pokemon objects, let's see what the co-occurrences are like for colors and typings (at first, let's look at them individually). To do this, we can make a reusable function that will aggregate the co-occurrences of values based on a key we choose:

In [9]:
from collections import defaultdict, Counter

def count_co_occurrences(pokemon_data, key, key2 = None):
    
    #--- Your code starts here

    co_occurrences = defaultdict(lambda: Counter())

    for pokemon in pokemon_data:
        for value in pokemon[key]:
            if (key2 != None):
                co_occur_list = [ val for val in pokemon[key2] ]
            else:
                co_occur_list = [ val for val in pokemon[key] ]
            co_occurrences[value] += Counter(co_occur_list)

    #--- Your code ends here            
    
    return co_occurrences

In [20]:
# Typing co-occurrences
typing_co_occurrences = count_co_occurrences(new_poke_dict, 'types')

# write type co-occurrences to file
with open('data/co_occurrences/typing_co_occurrences.json', 'w') as outfile:
    json.dump(dict(typing_co_occurrences), outfile)

# color co-occurrences
color_co_occurrences = count_co_occurrences(new_poke_dict, 'colors')

# write color co-occurrences to file
with open('data/co_occurrences/color_co_occurrences.json', 'w') as outfile:
    json.dump(dict(color_co_occurrences), outfile)

print("Grass type co-occurrences:")
print(typing_co_occurrences['Grass'])
print()
print("Water type co-occurrences:")
print(typing_co_occurrences['Water'])
print()
print("Fire type co-occurrences:")
print(typing_co_occurrences['Fire'])

# print()
# print("Number of times 'blue' and 'green' co-occur:")
# print(color_co_occurrences['blue']['green'])
# print()
# print("Number of times 'Black' and 'Salmon' co-occur:")
# print(color_co_occurrences['black']['salmon'])


Grass type co-occurrences:
Counter({'Grass': 107, 'Poison': 14, 'Bug': 6, 'Flying': 6, 'Ghost': 6, 'Fairy': 5, 'Psychic': 4, 'Dark': 4, 'Water': 3, 'Fighting': 3, 'Steel': 3, 'Dragon': 3, 'Rock': 2, 'Ice': 2, 'Normal': 2, 'Ground': 1})

Water type co-occurrences:
Counter({'Water': 142, 'Rock': 11, 'Ground': 9, 'Flying': 8, 'Ice': 7, 'Poison': 6, 'Psychic': 5, 'Bug': 5, 'Fairy': 4, 'Dark': 4, 'Dragon': 3, 'Grass': 3, 'Ghost': 3, 'Fighting': 2, 'Electric': 2, 'Steel': 1, 'Normal': 1, 'Fire': 1})

Fire type co-occurrences:
Counter({'Fire': 71, 'Flying': 6, 'Fighting': 6, 'Ghost': 4, 'Bug': 4, 'Rock': 3, 'Dark': 3, 'Ground': 2, 'Psychic': 2, 'Dragon': 2, 'Normal': 2, 'Poison': 2, 'Steel': 1, 'Water': 1})
Grass type color co-occurrences:
Counter()

Green color type co-occurrences:
Counter({'Grass': 59, 'Dragon': 15, 'Poison': 14, 'Bug': 12, 'Flying': 10, 'Water': 10, 'Psychic': 9, 'Ground': 9, 'Ghost': 6, 'Rock': 4, 'Fighting': 4, 'Steel': 4, 'Dark': 3, 'Electric': 3, 'Fairy': 3, 'Normal': 

The above gives us some interesting information about what types and colors co-occur the most with each other. As we see, grass and poison co-occur only 14 times among 905 records, while grass and fire never occur together (makes some sense). For the colors, we can see that black and white occur a lot together, while black and 'salmon' occur together barely at all.

Let us take this a little further, and see what the probabilities are like for certain typings to co-occur, and certain colors to co-occur.

In [11]:
# Function to get probability of one value occurring in the total number
def prob_value(total, co_occurrences, value):
    probability = co_occurrences[value][value] / total
    return round(probability, 5)

# Function to get probability that 2 values occur together
def prob_value_pair(total, co_occurrences, value1, value2):
    probability = co_occurrences[value1][value2] / total
    return round(probability, 5)

# Final function to get the probability of a value given another value
def prob_value_given_value(total, co_occurrences, value1, value2):
    prob_pair = prob_value_pair(total, co_occurrences, value1, value2)
    prob_b = prob_value(total, co_occurrences, value2)

    probability = prob_pair / prob_b
    return round(probability, 5)

In [12]:
print("Probability of a grass type:")
print(prob_value(len(new_poke_dict), typing_co_occurrences, 'Grass'))
print()
print("Probability of a grass/poison type:")
print(prob_value_pair(len(new_poke_dict), typing_co_occurrences, 'Grass', 'Poison'))
print()
print("Probability a Pokemon will be Poison given it is Grass:")
print(prob_value_given_value(len(new_poke_dict), typing_co_occurrences, 'Poison', 'Grass'))
print()
print("Probability that a Pokemon is both green and salmon colored:")
print(prob_value_pair(len(new_poke_dict), color_co_occurrences, 'salmon', 'green'))
print()
print("Probability a Pokemon has the color 'salmon' given that it has 'green':")
print(prob_value_given_value(len(new_poke_dict), color_co_occurrences, 'salmon', 'green'))

Probability of a grass type:
0.11823

Probability of a grass/poison type:
0.01547

Probability a Pokemon will be Poison given it is Grass:
0.13085

Probability that a Pokemon is both green and salmon colored:
0.0011

Probability a Pokemon has the color 'salmon' given that it has 'green':
0.00922


The above gives us some good details about the likelihood that certain types and colors will co-occur, both just overall and if one is already set to occur. For example, while Grass/Poison is only probable 1.55% of the time, if a Pokemon IS a Grass type, the probability it will be Poison is higher at 13.08%.

Let's take a look at the complete probabilities for certain types and colors.

In [29]:
def likely_co_values(total, co_occurrences, value1):
    ## initialize a Counter() for the co-ingredient probabilities
    probs = Counter()

    for val in co_occurrences[value1]:
        probs[val] += prob_value_given_value(total, co_occurrences, val, value1)

    return probs

In [30]:
print("Typings most likely to pair with 'Grass':")
print(likely_co_values(len(new_poke_dict), typing_co_occurrences, "Grass").most_common())
print()
print("Typings most likely to pair with 'Dragon':")
print(likely_co_values(len(new_poke_dict), typing_co_occurrences, "Dragon").most_common())
print()
print()
print("Colors likely to pair with 'salmon':")
print(likely_co_values(len(new_poke_dict), color_co_occurrences, "salmon").most_common(10))
print()
print("Colors likely to pair with 'green':")
print(likely_co_values(len(new_poke_dict), color_co_occurrences, "green").most_common(10))


Typings most likely to pair with 'Grass':
[('Grass', 1.0), ('Poison', 0.13085), ('Bug', 0.05608), ('Flying', 0.05608), ('Ghost', 0.05608), ('Fairy', 0.04669), ('Psychic', 0.03738), ('Dark', 0.03738), ('Water', 0.028), ('Fighting', 0.028), ('Steel', 0.028), ('Dragon', 0.028), ('Rock', 0.01869), ('Ice', 0.01869), ('Normal', 0.01869), ('Ground', 0.0093)]

Typings most likely to pair with 'Dragon':
[('Dragon', 1.0), ('Flying', 0.10714), ('Ground', 0.10714), ('Ghost', 0.07143), ('Dark', 0.07143), ('Water', 0.05349), ('Poison', 0.05349), ('Grass', 0.05349), ('Psychic', 0.03571), ('Steel', 0.03571), ('Fire', 0.03571), ('Electric', 0.03571), ('Rock', 0.03571), ('Fighting', 0.03571), ('Ice', 0.01778), ('Normal', 0.01778)]


Colors likely to pair with 'salmon':
[('salmon', 1.0), ('red', 0.5), ('darkslategray', 0.5), ('chocolate', 0.24887), ('wheat', 0.24887), ('darkgray', 0.24887), ('darkolivegreen', 0.24887), ('green', 0.24887), ('khaki', 0.24887), ('sienna', 0.24887)]

Colors likely to pair wi

Using our 'likely_co_values" function, this gives us all the probabilities that, given a certain attribute, a Pokemon will be another attribute too. We can use our previously-created 'all_typings' and 'all_colors' variable to cycle through all of them and aggregate all the likely pairings into 1 object. With that, we can then output 2 files showing the likelihood types and colors will co-occur.

In [51]:
# Reusable function is able to take 'total', a co-occurrences variable, and keys list in order to get likely pairings
def get_all_likely_pairings(total, co_occurrences, keys):
    likely_pairings = dict()
    for key in keys:
        likely_values = likely_co_values(total, co_occurrences, key)
        likely_values = dict(likely_values)

        for key2 in keys:
            if (key2 not in likely_values.keys()):
                likely_values[key2] = 0

        likely_values.pop(key)

        likely_values = list(likely_values.items())
        likely_values.sort(key=lambda x: x[1], reverse=True)

        likely_values_sorted = dict()
        for val in likely_values:
            likely_values_sorted[val[0]] = val[1]
        
        likely_pairings[key] = likely_values_sorted

    return likely_pairings

In [52]:
# Getting all likely type pairings
all_likely_type_pairings = get_all_likely_pairings(len(new_poke_dict), typing_co_occurrences, all_typings)

# Getting all likely color pairings
all_likely_color_pairings = get_all_likely_pairings(len(new_poke_dict), color_co_occurrences, all_colors)

# Save the likely Type pairings to a data file
with open('data/probabilities/type_pair_likelihood.json', 'w') as outfile:
    json.dump(all_likely_type_pairings, outfile)

# Save the likely Type pairings to a data file
with open('data/probabilities/color_pair_likelihood.json', 'w') as outfile:
    json.dump(all_likely_color_pairings, outfile)

What we want to do now is also compare color to typing, and vice versa: Given a color, what are the likelihoods of certain types; or given a type, what is the likelihood of it being a certain color? We can reuse our co-occurrences function from earlier, just passing in a 'key2' value to see both type + color, and color + type.

In [25]:
# Typing and color co-occurrences
typing_and_color_co_occurrences = count_co_occurrences(new_poke_dict, 'types', 'colors')

# Color and typing co-occurrences
color_and_typing_co_occurrences = count_co_occurrences(new_poke_dict, 'colors', 'types')

print("Grass type color co-occurrences:")
print(typing_and_color_co_occurrences['Grass'])
print()
print("Green color type co-occurrences:")
print(color_and_typing_co_occurrences['green'])

# Save the likely Type + color pairings to a data file
with open('data/co_occurrences/type_and_color_co_occurrences.json', 'w') as outfile:
    json.dump(dict(typing_and_color_co_occurrences), outfile)

# Save the likely Type + color pairings to a data file
with open('data/co_occurrences/color_and_type_co_occurrences.json', 'w') as outfile:
    json.dump(dict(color_and_typing_co_occurrences), outfile)

Grass type color co-occurrences:
Counter({'green': 59, 'darkseagreen': 31, 'dimgray': 28, 'darkslategray': 28, 'gray': 27, 'darkkhaki': 18, 'darkolivegreen': 18, 'silver': 16, 'tan': 15, 'brown': 15, 'seagreen': 14, 'snow': 14, 'khaki': 13, 'indianred': 12, 'mediumseagreen': 10, 'palevioletred': 9, 'darkgray': 9, 'rosybrown': 8, 'wheat': 8, 'sandybrown': 7, 'peru': 7, 'purple': 7, 'pink': 6, 'burlywood': 6, 'slategray': 5, 'blue': 5, 'yellow': 5, 'olivedrab': 5, 'gainsboro': 5, 'lightgray': 5, 'white': 5, 'cadetblue': 4, 'red': 4, 'sienna': 3, 'palegoldenrod': 3, 'yellowgreen': 3, 'darksalmon': 3, 'mediumaquamarine': 2, 'steelblue': 2, 'lightpink': 2, 'blanchedalmond': 2, 'lightslategray': 2, 'lightcoral': 2, 'goldenrod': 2, 'forestgreen': 1, 'darkslateblue': 1, 'mistyrose': 1, 'cornflowerblue': 1, 'beige': 1, 'floralwhite': 1, 'darkcyan': 1, 'bisque': 1, 'chocolate': 1})

Green color type co-occurrences:
Counter({'Grass': 59, 'Dragon': 15, 'Poison': 14, 'Bug': 12, 'Flying': 10, 'Water

After getting the co-occurrences, we can again get the likelihoods of those using our likelihood function.

In [50]:
# Getting all likely type/color pairings
all_likely_type_color_pairings = get_all_likely_pairings(len(new_poke_dict), typing_and_color_co_occurrences, all_typings)

# Getting all likely color/type pairings
all_likely_color_type_pairings = get_all_likely_pairings(len(new_poke_dict), color_and_typing_co_occurrences, all_colors)

# Save the likely Type pairings to a data file
with open('data/probabilities/type_and_color_pair_likelihood.json', 'w') as outfile:
    json.dump(all_likely_type_color_pairings, outfile)

# Save the likely Type pairings to a data file
with open('data/probabilities/color_and_type_pair_likelihood.json', 'w') as outfile:
    json.dump(all_likely_color_type_pairings, outfile)

ZeroDivisionError: float division by zero