In [1]:
import pandas as pd

### Cleaning the data

In [2]:
poke_json = pd.read_json("pokemon.json")
poke_json.to_csv("poke.csv", index=False)

In [3]:
poke_df = pd.read_csv("poke.csv")
poke_df.sort_values(by=['id'], inplace=True)
poke_df.drop('id', axis=1, inplace=True)
poke_df

Unnamed: 0,name,1st_type,2nd_type,total,hp,attack,defense,sp.atk,sp.def,speed
0,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45
1,Ivysaur,Grass,Poison,405,60,62,63,80,80,60
2,Venusaur,Grass,Poison,525,80,82,83,100,100,80
193,Venusaur,Grass,Poison,625,80,100,123,122,120,80
3,Charmander,Fire,,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...,...,...
1005,Fezandipiti,Poison,Fairy,555,88,91,82,70,125,99
1006,Ogerpon,Grass,,550,80,120,84,60,96,110
1007,Ogerpon,Grass,Water,550,80,120,84,60,96,110
1009,Ogerpon,Grass,Rock,550,80,120,84,60,96,110


### Features creation

In [4]:
# 18 unique pokemon types
poke_df['1st_type'].unique()

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Dark', 'Poison',
       'Electric', 'Ground', 'Ice', 'Fairy', 'Steel', 'Fighting',
       'Psychic', 'Rock', 'Ghost', 'Dragon', 'Flying'], dtype=object)

In [5]:
type_1_overall = poke_df.groupby('1st_type')['total'].sum().reset_index()
type_2_overall = poke_df.groupby('2nd_type')['total'].sum().reset_index()
type_1_overall = pd.pivot_table(type_1_overall, values="1st_type", columns="1st_type")
type_2_overall = pd.pivot_table(type_2_overall, values="2nd_type", columns="2nd_type")

In [6]:
type_1_overall.rename(columns={
    'Bug': '1st_type_Bug_over',
    'Dark': '1st_type_Dark_over',
    'Dragon': '1st_type_Dragon_over',
    'Electric': '1st_type_Electric_over',
    'Fairy': '1st_type_Fairy_over',
    'Fighting': '1st_type_Fighting_over',
    'Fire': '1st_type_Fire_over',
    'Flying': '1st_type_Flying_over',
    'Ghost': '1st_type_Ghost_over',
    'Grass': '1st_type_Grass_over',
    'Ground': '1st_type_Ground_over',
    'Ice': '1st_type_Ice_over',
    'Normal': '1st_type_Normal_over',
    'Poison': '1st_type_Poison_over',
    'Psychic': '1st_type_Psychic_over',
    'Rock': '1st_type_Rock_over',
    'Steel': '1st_type_Steel_over',
    'Water': '1st_type_Water_over'
}, inplace=True)

In [7]:
type_2_overall.rename(columns={
    'Bug': '2nd_type_Bug_over',
    'Dark': '2nd_type_Dark_over',
    'Dragon': '2nd_type_Dragon_over',
    'Electric': '2nd_type_Electric_over',
    'Fairy': '2nd_type_Fairy_over',
    'Fighting': '2nd_type_Fighting_over',
    'Fire': '2nd_type_Fire_over',
    'Flying': '2nd_type_Flying_over',
    'Ghost': '2nd_type_Ghost_over',
    'Grass': '2nd_type_Grass_over',
    'Ground': '2nd_type_Ground_over',
    'Ice': '2nd_type_Ice_over',
    'Normal': '2nd_type_Normal_over',
    'Poison': '2nd_type_Poison_over',
    'Psychic': '2nd_type_Psychic_over',
    'Rock': '2nd_type_Rock_over',
    'Steel': '2nd_type_Steel_over',
    'Water': '2nd_type_Water_over'
}, inplace=True)

In [8]:
features_df = type_1_overall.join(type_2_overall)
features_df

Unnamed: 0,1st_type_Bug_over,1st_type_Dark_over,1st_type_Dragon_over,1st_type_Electric_over,1st_type_Fairy_over,1st_type_Fighting_over,1st_type_Fire_over,1st_type_Flying_over,1st_type_Ghost_over,1st_type_Grass_over,...,2nd_type_Ghost_over,2nd_type_Grass_over,2nd_type_Ground_over,2nd_type_Ice_over,2nd_type_Normal_over,2nd_type_Poison_over,2nd_type_Psychic_over,2nd_type_Rock_over,2nd_type_Steel_over,2nd_type_Water_over
total,34686.0,25184.0,25850.0,32666.0,13926.0,22904.0,34141.0,4505.0,20599.0,47650.0,...,18366.0,14080.0,19326.0,11441.0,7947.0,19452.0,24342.0,9099.0,19719.0,11475.0


In [9]:
type_1_over_mean = poke_df.groupby('1st_type')['total'].mean().reset_index()
type_2_over_mean = poke_df.groupby('2nd_type')['total'].mean().reset_index()
type_1_over_mean = pd.pivot_table(type_1_over_mean, values="1st_type", columns="1st_type")
type_2_over_mean = pd.pivot_table(type_2_over_mean, values="2nd_type", columns="2nd_type")

In [10]:
type_1_over_mean.rename(columns={
    'Bug': '1st_type_Bug_over_mean',
    'Dark': '1st_type_Dark_over_mean',
    'Dragon': '1st_type_Dragon_over_mean',
    'Electric': '1st_type_Electric_over_mean',
    'Fairy': '1st_type_Fairy_over_mean',
    'Fighting': '1st_type_Fighting_over_mean',
    'Fire': '1st_type_Fire_over_mean',
    'Flying': '1st_type_Flying_over_mean',
    'Ghost': '1st_type_Ghost_over_mean',
    'Grass': '1st_type_Grass_over_mean',
    'Ground': '1st_type_Ground_over_mean',
    'Ice': '1st_type_Ice_over_mean',
    'Normal': '1st_type_Normal_over_mean',
    'Poison': '1st_type_Poison_over_mean',
    'Psychic': '1st_type_Psychic_over_mean',
    'Rock': '1st_type_Rock_over_mean',
    'Steel': '1st_type_Steel_over_mean',
    'Water': '1st_type_Water_over_mean'
}, inplace=True)

In [11]:
type_2_over_mean.rename(columns={
    'Bug': '2nd_type_Bug_over_mean',
    'Dark': '2nd_type_Dark_over_mean',
    'Dragon': '2nd_type_Dragon_over_mean',
    'Electric': '2nd_type_Electric_over_mean',
    'Fairy': '2nd_type_Fairy_over_mean',
    'Fighting': '2nd_type_Fighting_over_mean',
    'Fire': '2nd_type_Fire_over_mean',
    'Flying': '2nd_type_Flying_over_mean',
    'Ghost': '2nd_type_Ghost_over_mean',
    'Grass': '2nd_type_Grass_over_mean',
    'Ground': '2nd_type_Ground_over_mean',
    'Ice': '2nd_type_Ice_over_mean',
    'Normal': '2nd_type_Normal_over_mean',
    'Poison': '2nd_type_Poison_over_mean',
    'Psychic': '2nd_type_Psychic_over_mean',
    'Rock': '2nd_type_Rock_over_mean',
    'Steel': '2nd_type_Steel_over_mean',
    'Water': '2nd_type_Water_over_mean'
}, inplace=True)

In [12]:
mean_df = type_1_over_mean.join(type_2_over_mean)
features_df.join(mean_df)

Unnamed: 0,1st_type_Bug_over,1st_type_Dark_over,1st_type_Dragon_over,1st_type_Electric_over,1st_type_Fairy_over,1st_type_Fighting_over,1st_type_Fire_over,1st_type_Flying_over,1st_type_Ghost_over,1st_type_Grass_over,...,2nd_type_Ghost_over_mean,2nd_type_Grass_over_mean,2nd_type_Ground_over_mean,2nd_type_Ice_over_mean,2nd_type_Normal_over_mean,2nd_type_Poison_over_mean,2nd_type_Psychic_over_mean,2nd_type_Rock_over_mean,2nd_type_Steel_over_mean,2nd_type_Water_over_mean
total,34686.0,25184.0,25850.0,32666.0,13926.0,22904.0,34141.0,4505.0,20599.0,47650.0,...,470.923077,426.666667,449.44186,520.045455,418.263158,413.87234,486.84,454.95,492.975,441.346154
