In [1]:
import pandas as pd
import numpy as np
import base64
import os
from PIL import Image
from io import BytesIO
import ast

pd.set_option('display.max_columns', None)

def img_to_base64(path):
    img = Image.open(path).convert('RGB')
    h,w = img.size
    img.thumbnail((h,w), Image.ANTIALIAS)
    im_file = BytesIO()
    img.save(im_file, format="JPEG")
    im_bytes = im_file.getvalue()  # im_bytes: image in binary format.
    im_b64 = base64.b64encode(im_bytes)
    return im_b64

## Images to base64

In [2]:
df_img1 = pd.DataFrame(os.listdir('images'), columns=['images'])
df_img1['base64'] = df_img1.images.apply(lambda x: img_to_base64('images/'+x))
df_img1['name'] = df_img1.images.str[:-4].str.lower()
df_img2 = df_img1[df_img1['name'].str.contains('-')].assign(name = df_img1.name.str.replace('-',' '))
df_img_final = pd.concat([df_img1, df_img2]).drop('images',1)



In [127]:
df_img_final_dict = dict(df_img_final[['name', 'base64']].values)

## Create pokemon dataset w. images

In [128]:
df = pd.read_csv('pokemon.csv')
df['base64'] = df.name.str.lower().map(df_img_final_dict)
df.drop('abilities',1).to_csv('Power BI data/pokemon.csv', index = False, decimal=',')

## Create abilities dataset

In [129]:
df_abilities = df[['name', 'abilities']].explode('abilities')

In [130]:
df_abilities['abilities'] = df_abilities.abilities.apply(ast.literal_eval)
df_abilities = df_abilities.explode('abilities')

In [131]:
df_abilities.to_csv('Power BI data/abilities.csv', index = False, decimal=',')

## Create Stats dataset

In [150]:
from sklearn import preprocessing

In [151]:
def normalize_array(a):
    return np.round((a - np.min(a))/np.ptp(a)*100,2)

In [152]:
df_stats = df[['name', 'hp', 'attack','defense', 'speed', 'sp_attack', 'sp_defense', 'speed']].reset_index(drop=True)

In [153]:
df_stats_norm = df_stats.copy()
for stat in ['hp', 'attack','defense', 'speed', 'sp_attack', 'sp_defense', 'speed']:
    df_stats_norm[stat] = normalize_array(df_stats_norm[stat])

In [154]:
df_stats_norm = df_stats_norm.melt(id_vars='name', var_name="stat", value_name="value_normalized")

In [155]:
df_stats = df_stats.melt(id_vars='name', var_name="stat", value_name="value")

In [156]:
df_stats = df_stats.merge(df_stats_norm, on=['name', 'stat'],how='left')

In [157]:
df_stats.to_csv('Power BI data/stats.csv', index = False, decimal=',')