In [347]:
# https://preppindata.blogspot.com/2021/02/2021-week-10-pokemon-hierarchies.html

import pandas as pd
import numpy as np

### Input the data

In [348]:
df_pkmon = pd.read_excel(r'data/PD 2021 Wk 10 Input.xlsx', sheet_name = 'Pokemon')
df_pkmon

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed
0,001,Bulbasaur,GRASS,318,45,49,49,65,65,45
1,001,Bulbasaur,POISON,318,45,49,49,65,65,45
2,002,Ivysaur,GRASS,405,60,62,63,80,80,60
3,002,Ivysaur,POISON,405,60,62,63,80,80,60
4,003,Venusaur,GRASS,525,80,82,83,100,100,80
...,...,...,...,...,...,...,...,...,...,...
1163,716,Xerneas,FAIRY,680,126,131,95,131,98,99
1164,717,Yveltal,DARK,680,126,131,95,131,98,99
1165,717,Yveltal,FLYING,680,126,131,95,131,98,99
1166,718,Zygarde,DRAGON,600,108,100,121,81,95,95


### Our Pokémon dataset actually contains too many Pokémon: (help)
- We're only interested in Pokémon up to Generation III, which is up to (and including) number 386
- This means we're also not interested in mega evolutions so we can filter Pokémon whose name start with "Mega"

In [349]:
df_pkmon['ID'] = df_pkmon['#'].astype(float)
df_pkmon = df_pkmon[df_pkmon['ID']<=386]
df_pkmon.drop(columns='ID',inplace=True)
df_pkmon = df_pkmon[~df_pkmon['Name'].str.contains('Mega ')]

df_pkmon

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pkmon.drop(columns='ID',inplace=True)


Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed
0,001,Bulbasaur,GRASS,318,45,49,49,65,65,45
1,001,Bulbasaur,POISON,318,45,49,49,65,65,45
2,002,Ivysaur,GRASS,405,60,62,63,80,80,60
3,002,Ivysaur,POISON,405,60,62,63,80,80,60
4,003,Venusaur,GRASS,525,80,82,83,100,100,80
...,...,...,...,...,...,...,...,...,...,...
612,384,Rayquaza,DRAGON,680,105,150,90,150,90,95
613,384,Rayquaza,FLYING,680,105,150,90,150,90,95
614,385,Jirachi,STEEL,600,100,100,100,100,100,100
615,385,Jirachi,PSYCHIC,600,100,100,100,100,100,100


### Some Pokémon have more than one Type. We aren't interested in Types for this challenge so remove this field and ensure we have one row per Pokémon

In [350]:
df_pkmon = df_pkmon.drop(['Type'],axis=1)
df_pkmon.drop_duplicates(inplace=True)
df_pkmon

Unnamed: 0,#,Name,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed
0,001,Bulbasaur,318,45,49,49,65,65,45
2,002,Ivysaur,405,60,62,63,80,80,60
4,003,Venusaur,525,80,82,83,100,100,80
8,004,Charmander,309,39,52,43,60,50,65
9,005,Charmeleon,405,58,64,58,80,65,80
...,...,...,...,...,...,...,...,...,...
610,382,Kyogre,670,100,100,90,150,140,90
611,383,Groudon,670,100,150,140,100,90,90
612,384,Rayquaza,680,105,150,90,150,90,95
614,385,Jirachi,600,100,100,100,100,100,100


### Now we want to bring in information about what our Pokémon evolve to (help)
- Warning!  In our Evolution dataset, we still have Pokémon beyond Gen III. You'll need to filter these out too, from both the evolved from and evolved to fields (help)

In [351]:
ls_gen3_name = df_pkmon['Name']

df_evo = pd.read_excel(r'data/PD 2021 Wk 10 Input.xlsx', sheet_name = 'Evolution')
df_evo = df_evo[df_evo['Evolving from'].isin(ls_gen3_name)]
df_evo = df_evo[df_evo['Evolving to'].isin(ls_gen3_name)]
df_evo

Unnamed: 0,Evolving from,Evolving to,Level,Condition,Evolution Type
0,Bulbasaur,Ivysaur,16.0,,Level
1,Ivysaur,Venusaur,32.0,,Level
2,Charmander,Charmeleon,16.0,,Level
3,Charmeleon,Charizard,36.0,,Level
4,Squirtle,Wartortle,16.0,,Level
...,...,...,...,...,...
378,Chansey,Blissey,,,Happiness
379,Eevee,Espeon,,Daytime,Happiness
380,Eevee,Umbreon,,Nighttime,Happiness
382,Togepi,Togetic,,,Happiness


### Bring in information about what a Pokémon evolves from (help)
- Ensure that we have all 386 of our Pokémon, with nulls if they don't have a pre-evolved form or if they don't evolve
- Some duplication may have occurred with all our joins, ensure no 2 rows are exactly the same

In [352]:
#previous evolution
df_pkmon_w_evo = df_pkmon.merge(df_evo[['Evolving to', 'Evolving from']], left_on='Name', right_on='Evolving to', how='left')
df_pkmon_w_evo = df_pkmon_w_evo.drop('Evolving to', axis=1)
#next evolution
df_pkmon_w_evo = df_pkmon_w_evo.merge(df_evo, left_on='Name', right_on='Evolving from', how='left')
df_pkmon_w_evo = df_pkmon_w_evo.drop('Evolving from_y', axis=1)
df_pkmon_w_evo.rename(columns={'Evolving from_x': 'Evolving from'}, inplace=True)

df_pkmon_w_evo = df_pkmon_w_evo.drop_duplicates()

df_pkmon_w_evo

Unnamed: 0,#,Name,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed,Evolving from,Evolving to,Level,Condition,Evolution Type
0,001,Bulbasaur,318,45,49,49,65,65,45,,Ivysaur,16.0,,Level
1,002,Ivysaur,405,60,62,63,80,80,60,Bulbasaur,Venusaur,32.0,,Level
2,003,Venusaur,525,80,82,83,100,100,80,Ivysaur,,,,
3,004,Charmander,309,39,52,43,60,50,65,,Charmeleon,16.0,,Level
4,005,Charmeleon,405,58,64,58,80,65,80,Charmander,Charizard,36.0,,Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,382,Kyogre,670,100,100,90,150,140,90,,,,,
447,383,Groudon,670,100,150,140,100,90,90,,,,,
448,384,Rayquaza,680,105,150,90,150,90,95,,,,,
449,385,Jirachi,600,100,100,100,100,100,100,,,,,


### Finally, for Pokémon that have 3 evolutions, we want to know what the First Evolution is in their Evolution Group
- The Evolution Group will be named after the First Evolution e.g. in the above example, Bulbasaur is the name of the Evolution Group

In [353]:
Triple_evo = df_pkmon_w_evo[['Name','Evolving from', 'Evolving to']]
Triple_evo = Triple_evo[~Triple_evo['Evolving from'].isna()]
Triple_evo = Triple_evo[~Triple_evo['Evolving to'].isna()]

#The DF above is the intermitent. The Pokemons with same evolution line has same evolution group, so we need to do for each for those pokemon
Triple_evo_1st = Triple_evo.copy()
Triple_evo_1st['Evolution Group'] = Triple_evo_1st['Evolving from']
Triple_evo_1st[['Name','Evolving to']] = Triple_evo_1st[['Evolving from','Name']]
Triple_evo_1st['Evolving from'] = np.nan


Triple_evo_2nd = Triple_evo.copy()
Triple_evo_2nd['Evolution Group'] = Triple_evo_2nd['Evolving from']

Triple_evo_3rd = Triple_evo.copy()
Triple_evo_3rd['Evolution Group'] = Triple_evo_3rd['Evolving from']
Triple_evo_3rd[['Name','Evolving from']] = Triple_evo_3rd[['Evolving to','Name']]
Triple_evo_3rd['Evolving to'] = np.nan
Triple_evo_3rd

Triple_evo_all = pd.concat([Triple_evo_1st,Triple_evo_2nd,Triple_evo_3rd])
Triple_evo_all

Unnamed: 0,Name,Evolving from,Evolving to,Evolution Group
1,Bulbasaur,,Ivysaur,Bulbasaur
4,Charmander,,Charmeleon,Charmander
7,Squirtle,,Wartortle,Squirtle
10,Caterpie,,Metapod,Caterpie
13,Weedle,,Kakuna,Weedle
...,...,...,...,...
369,Aggron,Lairon,,Aron
393,Flygon,Vibrava,,Trapinch
428,Walrein,Sealeo,,Spheal
436,Salamence,Shelgon,,Bagon


In [354]:
Double_evo = df_pkmon_w_evo[['Name','Evolving from', 'Evolving to']]
Double_evo = Double_evo[Double_evo['Evolving from'].isna()]
Double_evo = Double_evo[~(Double_evo['Evolving from'].isna() & Double_evo['Evolving to'].isna())]
Double_evo = Double_evo[~(Double_evo['Name'].isin(Triple_evo_all['Name']))]

Double_evo_1st = Double_evo.copy()
Double_evo_1st['Evolution Group'] = Double_evo_1st['Name']
Double_evo_1st

Double_evo_2nd = Double_evo.copy()
Double_evo_2nd['Evolution Group'] = Double_evo_2nd['Name']
Double_evo_2nd[['Name','Evolving from']] = Double_evo_2nd[['Evolving to','Name']]
Double_evo_2nd['Evolving to'] = np.nan

Double_evo_all = pd.concat([Double_evo_1st,Double_evo_2nd])
Double_evo_all

Unnamed: 0,Name,Evolving from,Evolving to,Evolution Group
18,Rattata,,Raticate,Rattata
20,Spearow,,Fearow,Spearow
22,Ekans,,Arbok,Ekans
27,Sandshrew,,Sandslash,Sandshrew
45,Vulpix,,Ninetales,Vulpix
...,...,...,...,...
413,Milotic,Feebas,,Feebas
417,Banette,Shuppet,,Shuppet
419,Dusclops,Duskull,,Duskull
424,Wobbuffet,Wynaut,,Wynaut


In [355]:
single_evo = df_pkmon_w_evo[['Name','Evolving from', 'Evolving to']]
single_evo = single_evo[(single_evo['Evolving from'].isna() & single_evo['Evolving to'].isna())]
single_evo['Evolution Group'] = No_evo['Name']
single_evo

Unnamed: 0,Name,Evolving from,Evolving to,Evolution Group
109,Farfetch'd,,,Farfetch'd
138,Lickitung,,,Lickitung
145,Tangela,,,Tangela
146,Kangaskhan,,,Kangaskhan
155,Mr. Mime,,,Mr. Mime
...,...,...,...,...
446,Kyogre,,,Kyogre
447,Groudon,,,Groudon
448,Rayquaza,,,Rayquaza
449,Jirachi,,,Jirachi


In [356]:
# combine all the evolution
evo_all = pd.concat([single_evo,Double_evo_all,Triple_evo_all])
evo_all.drop_duplicates()

Unnamed: 0,Name,Evolving from,Evolving to,Evolution Group
109,Farfetch'd,,,Farfetch'd
138,Lickitung,,,Lickitung
145,Tangela,,,Tangela
146,Kangaskhan,,,Kangaskhan
155,Mr. Mime,,,Mr. Mime
...,...,...,...,...
369,Aggron,Lairon,,Aron
393,Flygon,Vibrava,,Trapinch
428,Walrein,Sealeo,,Spheal
436,Salamence,Shelgon,,Bagon


In [357]:
df_output = df_pkmon_w_evo.merge(evo_all, on=['Name','Evolving from','Evolving to'], how='inner')
df_output.drop_duplicates(inplace=True)
df_output

Unnamed: 0,#,Name,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed,Evolving from,Evolving to,Level,Condition,Evolution Type,Evolution Group
0,001,Bulbasaur,318,45,49,49,65,65,45,,Ivysaur,16.0,,Level,Bulbasaur
1,002,Ivysaur,405,60,62,63,80,80,60,Bulbasaur,Venusaur,32.0,,Level,Bulbasaur
2,003,Venusaur,525,80,82,83,100,100,80,Ivysaur,,,,,Bulbasaur
3,004,Charmander,309,39,52,43,60,50,65,,Charmeleon,16.0,,Level,Charmander
4,005,Charmeleon,405,58,64,58,80,65,80,Charmander,Charizard,36.0,,Level,Charmander
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,382,Kyogre,670,100,100,90,150,140,90,,,,,,Kyogre
395,383,Groudon,670,100,150,140,100,90,90,,,,,,Groudon
396,384,Rayquaza,680,105,150,90,150,90,95,,,,,,Rayquaza
397,385,Jirachi,600,100,100,100,100,100,100,,,,,,Jirachi


### Output the data

In [358]:
df_output.to_csv(r'output/2021-week10-output.csv')