## **Loading data into Pandas**

In [0]:
import pandas as pd

In [35]:
df = pd.read_csv('pokemon_data.csv')

print(df.head().T)

                    0        1         2                      3           4
#                   1        2         3                      3           4
Name        Bulbasaur  Ivysaur  Venusaur  VenusaurMega Venusaur  Charmander
Type 1          Grass    Grass     Grass                  Grass        Fire
Type 2         Poison   Poison    Poison                 Poison         NaN
HP                 45       60        80                     80          39
Attack             49       62        82                    100          52
Defense            49       63        83                    123          43
Sp. Atk            65       80       100                    122          60
Sp. Def            65       80       100                    120          50
Speed              45       60        80                     80          65
Generation          1        1         1                      1           1
Legendary       False    False     False                  False       False


## **Reading Data in Pandas**

In [36]:
# Read Headers
print(df.columns)

Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')


In [37]:
# Read each Column
print(df['Name'][0:5])

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object


In [38]:
print(df[['Name', 'Type 1', 'HP']])

                      Name   Type 1  HP
0                Bulbasaur    Grass  45
1                  Ivysaur    Grass  60
2                 Venusaur    Grass  80
3    VenusaurMega Venusaur    Grass  80
4               Charmander     Fire  39
..                     ...      ...  ..
795                Diancie     Rock  50
796    DiancieMega Diancie     Rock  50
797    HoopaHoopa Confined  Psychic  80
798     HoopaHoopa Unbound  Psychic  80
799              Volcanion     Fire  80

[800 rows x 3 columns]


In [39]:
# Read Each Row
print(df.iloc[1:4].T)

                  1         2                      3
#                 2         3                      3
Name        Ivysaur  Venusaur  VenusaurMega Venusaur
Type 1        Grass     Grass                  Grass
Type 2       Poison    Poison                 Poison
HP               60        80                     80
Attack           62        82                    100
Defense          63        83                    123
Sp. Atk          80       100                    122
Sp. Def          80       100                    120
Speed            60        80                     80
Generation        1         1                      1
Legendary     False     False                  False


In [40]:
# Read a specific location (R,C)
print(df.iloc[2,1])

Venusaur


In [41]:
# Iterating Row by Row
for index, row in df.iterrows():
  print(index, row['Name'])

0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
5 Charmeleon
6 Charizard
7 CharizardMega Charizard X
8 CharizardMega Charizard Y
9 Squirtle
10 Wartortle
11 Blastoise
12 BlastoiseMega Blastoise
13 Caterpie
14 Metapod
15 Butterfree
16 Weedle
17 Kakuna
18 Beedrill
19 BeedrillMega Beedrill
20 Pidgey
21 Pidgeotto
22 Pidgeot
23 PidgeotMega Pidgeot
24 Rattata
25 Raticate
26 Spearow
27 Fearow
28 Ekans
29 Arbok
30 Pikachu
31 Raichu
32 Sandshrew
33 Sandslash
34 Nidoran (Female)
35 Nidorina
36 Nidoqueen
37 Nidoran (Male)
38 Nidorino
39 Nidoking
40 Clefairy
41 Clefable
42 Vulpix
43 Ninetales
44 Jigglypuff
45 Wigglytuff
46 Zubat
47 Golbat
48 Oddish
49 Gloom
50 Vileplume
51 Paras
52 Parasect
53 Venonat
54 Venomoth
55 Diglett
56 Dugtrio
57 Meowth
58 Persian
59 Psyduck
60 Golduck
61 Mankey
62 Primeape
63 Growlithe
64 Arcanine
65 Poliwag
66 Poliwhirl
67 Poliwrath
68 Abra
69 Kadabra
70 Alakazam
71 AlakazamMega Alakazam
72 Machop
73 Machoke
74 Machamp
75 Bellsprout
76 Weepinbell
77 

In [42]:
# Read a specific location (R,C) by labels
df.loc[df['Type 1'] == 'Grass']

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
48,43,Oddish,Grass,Poison,45,50,55,75,65,30,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
718,650,Chespin,Grass,,56,61,65,48,45,38,6,False
719,651,Quilladin,Grass,,61,78,95,56,58,57,6,False
720,652,Chesnaught,Grass,Fighting,88,107,122,74,75,64,6,False
740,672,Skiddo,Grass,,66,65,48,62,57,52,6,False


## **Sorting/Describing Data**

In [43]:
df.describe()

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [44]:
df.sort_values('Name', ascending=False)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
794,718,Zygarde50% Forme,Dragon,Ground,108,100,121,81,95,95,6,True
695,634,Zweilous,Dark,Dragon,72,85,70,65,70,58,5,False
46,41,Zubat,Poison,Flying,40,45,35,30,40,55,1,False
631,570,Zorua,Dark,,40,65,40,80,40,65,5,False
632,571,Zoroark,Dark,,60,105,60,120,60,105,5,False
...,...,...,...,...,...,...,...,...,...,...,...,...
393,359,AbsolMega Absol,Dark,,65,150,60,115,60,115,3,False
392,359,Absol,Dark,,65,130,60,75,60,75,3,False
68,63,Abra,Psychic,,25,20,15,105,55,90,1,False
511,460,AbomasnowMega Abomasnow,Grass,Ice,90,132,105,132,105,30,4,False


In [45]:
df.sort_values(['Type 1', 'HP'], ascending=[1,0])

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
520,469,Yanmega,Bug,Flying,86,76,86,116,56,95,4,False
698,637,Volcarona,Bug,Fire,85,60,65,135,105,100,5,False
231,214,Heracross,Bug,Fighting,80,125,75,40,95,85,2,False
232,214,HeracrossMega Heracross,Bug,Fighting,80,185,115,40,105,75,2,False
678,617,Accelgor,Bug,,80,70,40,100,60,145,5,False
...,...,...,...,...,...,...,...,...,...,...,...,...
106,98,Krabby,Water,,30,105,90,25,25,50,1,False
125,116,Horsea,Water,,30,40,70,70,25,60,1,False
129,120,Staryu,Water,,30,45,55,70,55,85,1,False
139,129,Magikarp,Water,,20,10,55,15,20,80,1,False


## **Making changes to the data**

In [46]:
df.head(5).T

Unnamed: 0,0,1,2,3,4
#,1,2,3,3,4
Name,Bulbasaur,Ivysaur,Venusaur,VenusaurMega Venusaur,Charmander
Type 1,Grass,Grass,Grass,Grass,Fire
Type 2,Poison,Poison,Poison,Poison,
HP,45,60,80,80,39
Attack,49,62,82,100,52
Defense,49,63,83,123,43
Sp. Atk,65,80,100,122,60
Sp. Def,65,80,100,120,50
Speed,45,60,80,80,65


In [48]:
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df.head(5).T

Unnamed: 0,0,1,2,3,4
#,1,2,3,3,4
Name,Bulbasaur,Ivysaur,Venusaur,VenusaurMega Venusaur,Charmander
Type 1,Grass,Grass,Grass,Grass,Fire
Type 2,Poison,Poison,Poison,Poison,
HP,45,60,80,80,39
Attack,49,62,82,100,52
Defense,49,63,83,123,43
Sp. Atk,65,80,100,122,60
Sp. Def,65,80,100,120,50
Speed,45,60,80,80,65


In [50]:
# Drop the column
df = df.drop(columns=['Total'])
df.head(5).T

Unnamed: 0,0,1,2,3,4
#,1,2,3,3,4
Name,Bulbasaur,Ivysaur,Venusaur,VenusaurMega Venusaur,Charmander
Type 1,Grass,Grass,Grass,Grass,Fire
Type 2,Poison,Poison,Poison,Poison,
HP,45,60,80,80,39
Attack,49,62,82,100,52
Defense,49,63,83,123,43
Sp. Atk,65,80,100,122,60
Sp. Def,65,80,100,120,50
Speed,45,60,80,80,65


In [52]:
# Add a column in a better way
df['Total'] = df.iloc[:,4:10].sum(axis=1)

cols = list(df.columns.values)
df = df[cols[0:4] + [cols[-1]] + cols[4:12]]

df.head(5)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


## **Saving our Data (Exploring into Desired Format** 

In [0]:
df.to_csv('modified.csv', index=False)

## **Filtering Data**

In [55]:
df.head().T

Unnamed: 0,0,1,2,3,4
#,1,2,3,3,4
Name,Bulbasaur,Ivysaur,Venusaur,VenusaurMega Venusaur,Charmander
Type 1,Grass,Grass,Grass,Grass,Fire
Type 2,Poison,Poison,Poison,Poison,
Total,318,405,525,625,309
HP,45,60,80,80,39
Attack,49,62,82,100,52
Defense,49,63,83,123,43
Sp. Atk,65,80,100,122,60
Sp. Def,65,80,100,120,50


In [58]:
df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
48,43,Oddish,Grass,Poison,320,45,50,55,75,65,30,1,False
49,44,Gloom,Grass,Poison,395,60,65,70,85,75,40,1,False
50,45,Vileplume,Grass,Poison,490,75,80,85,110,90,50,1,False
75,69,Bellsprout,Grass,Poison,300,50,75,35,70,30,40,1,False
76,70,Weepinbell,Grass,Poison,390,65,90,50,85,45,55,1,False
77,71,Victreebel,Grass,Poison,490,80,105,65,100,70,70,1,False


In [61]:
new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]
print(new_df)

       #                   Name Type 1  ... Speed  Generation  Legendary
2      3               Venusaur  Grass  ...    80           1      False
3      3  VenusaurMega Venusaur  Grass  ...    80           1      False
50    45              Vileplume  Grass  ...    50           1      False
77    71             Victreebel  Grass  ...    70           1      False
652  591              Amoonguss  Grass  ...    30           5      False

[5 rows x 13 columns]


In [65]:
# Filtered data contains index from the previous dataset
# Reset index
new_df.reset_index(drop=True, inplace=True)

new_df

Unnamed: 0,index,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
1,3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
2,50,45,Vileplume,Grass,Poison,490,75,80,85,110,90,50,1,False
3,77,71,Victreebel,Grass,Poison,490,80,105,65,100,70,70,1,False
4,652,591,Amoonguss,Grass,Poison,464,114,85,70,85,80,30,5,False


In [66]:
# All the names that contain 'Mega'
df.loc[df['Name'].str.contains('Mega')]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
7,6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False
8,6,CharizardMega Charizard Y,Fire,Flying,634,78,104,78,159,115,100,1,False
12,9,BlastoiseMega Blastoise,Water,,630,79,103,120,135,115,78,1,False
19,15,BeedrillMega Beedrill,Bug,Poison,495,65,150,40,15,80,145,1,False
23,18,PidgeotMega Pidgeot,Normal,Flying,579,83,80,80,135,80,121,1,False
71,65,AlakazamMega Alakazam,Psychic,,590,55,50,65,175,95,150,1,False
87,80,SlowbroMega Slowbro,Water,Psychic,590,95,75,180,130,80,30,1,False
102,94,GengarMega Gengar,Ghost,Poison,600,60,65,80,170,95,130,1,False
124,115,KangaskhanMega Kangaskhan,Normal,,590,105,125,100,60,100,100,1,False


In [67]:
# All the names that do not contain 'Mega'
df.loc[~df['Name'].str.contains('Mega')]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
5,5,Charmeleon,Fire,,405,58,64,58,80,65,80,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,718,Zygarde50% Forme,Dragon,Ground,600,108,100,121,81,95,95,6,True
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [69]:
# Regex can be used in str.contains()
import re

df.loc[df['Type 1'].str.contains('fire|grass', flags=re.I, regex=True)]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
735,667,Litleo,Fire,Normal,369,62,50,58,73,54,72,6,False
736,668,Pyroar,Fire,Normal,507,86,68,72,109,66,106,6,False
740,672,Skiddo,Grass,,350,66,65,48,62,57,52,6,False
741,673,Gogoat,Grass,,531,123,100,62,97,81,68,6,False


In [72]:
# Names that start with 'pi'
df.loc[df['Name'].str.contains('^pi[a-z]*', flags=re.I, regex=True)]

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
20,16,Pidgey,Normal,Flying,251,40,45,40,35,35,56,1,False
21,17,Pidgeotto,Normal,Flying,349,63,60,55,50,50,71,1,False
22,18,Pidgeot,Normal,Flying,479,83,80,75,70,70,101,1,False
23,18,PidgeotMega Pidgeot,Normal,Flying,579,83,80,80,135,80,121,1,False
30,25,Pikachu,Electric,,320,35,55,40,50,50,90,1,False
136,127,Pinsir,Bug,,500,65,125,100,55,70,85,1,False
137,127,PinsirMega Pinsir,Bug,Flying,600,65,155,120,65,90,105,1,False
186,172,Pichu,Electric,,205,20,40,15,35,35,60,2,False
219,204,Pineco,Bug,,290,50,65,90,35,35,15,2,False
239,221,Piloswine,Ice,Ground,450,100,100,80,60,60,50,2,False


## **Conditional Changes**

In [73]:
df.loc[df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer'

df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Flamer,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [74]:
df.loc[df['Type 1'] == 'Flamer', 'Type 1'] = 'Fire'

df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [76]:
# All the Fire Type 1 will become Legendary True
df.loc[df['Type 1'] == 'Fire', 'Legendary'] = True

df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [78]:
df.loc[df['Total'] > 500, ['Generation', 'Legendary']] = ['Test 1', 'Test 2']

df

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,Test 1,Test 2
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,Test 1,Test 2
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,Test 1,Test 2
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,Test 1,Test 2
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,Test 1,Test 2
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,Test 1,Test 2


## **Aggregate Statistics**

In [80]:
df = pd.read_csv('pokemon_data.csv')

df.head().T

Unnamed: 0,0,1,2,3,4
#,1,2,3,3,4
Name,Bulbasaur,Ivysaur,Venusaur,VenusaurMega Venusaur,Charmander
Type 1,Grass,Grass,Grass,Grass,Fire
Type 2,Poison,Poison,Poison,Poison,
HP,45,60,80,80,39
Attack,49,62,82,100,52
Defense,49,63,83,123,43
Sp. Atk,65,80,100,122,60
Sp. Def,65,80,100,120,50
Speed,45,60,80,80,65


In [83]:
df.groupby(['Type 1']).mean().sort_values('Attack', ascending=False)

Unnamed: 0_level_0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Dragon,474.375,83.3125,112.125,86.375,96.84375,88.84375,83.03125,3.875,0.375
Fighting,363.851852,69.851852,96.777778,65.925926,53.111111,64.703704,66.074074,3.37037,0.0
Ground,356.28125,73.78125,95.75,84.84375,56.46875,62.75,63.90625,3.15625,0.125
Rock,392.727273,65.363636,92.863636,100.795455,63.340909,75.477273,55.909091,3.454545,0.090909
Steel,442.851852,65.222222,92.703704,126.37037,67.518519,80.62963,55.259259,3.851852,0.148148
Dark,461.354839,66.806452,88.387097,70.225806,74.645161,69.516129,76.16129,4.032258,0.064516
Fire,327.403846,69.903846,84.769231,67.769231,88.980769,72.211538,74.442308,3.211538,0.096154
Flying,677.75,70.75,78.75,66.25,94.25,72.5,102.5,5.5,0.5
Poison,251.785714,67.25,74.678571,68.821429,60.428571,64.392857,63.571429,2.535714,0.0
Water,303.089286,72.0625,74.151786,72.946429,74.8125,70.517857,65.964286,2.857143,0.035714


In [84]:
df.groupby(['Type 1']).sum()

Unnamed: 0_level_0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bug,23080,3925,4897,4880,3717,4471,4256,222,0.0
Dark,14302,2071,2740,2177,2314,2155,2361,125,2.0
Dragon,15180,2666,3588,2764,3099,2843,2657,124,12.0
Electric,15994,2631,3040,2917,3961,3243,3718,144,4.0
Fairy,7642,1260,1046,1117,1335,1440,826,70,1.0
Fighting,9824,1886,2613,1780,1434,1747,1784,91,0.0
Fire,17025,3635,4408,3524,4627,3755,3871,167,5.0
Flying,2711,283,315,265,377,290,410,22,2.0
Ghost,15568,2062,2361,2598,2539,2447,2059,134,2.0
Grass,24141,4709,5125,4956,5425,4930,4335,235,3.0


In [86]:
df['count'] = 1
df

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,count
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,1
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,1
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,1
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,1
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True,1
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True,1
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True,1
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True,1


In [87]:
df.groupby(['Type 1']).count()['count']

Type 1
Bug          69
Dark         31
Dragon       32
Electric     44
Fairy        17
Fighting     27
Fire         52
Flying        4
Ghost        32
Grass        70
Ground       32
Ice          24
Normal       98
Poison       28
Psychic      57
Rock         44
Steel        27
Water       112
Name: count, dtype: int64

In [88]:
df.groupby(['Type 1', 'Type 2']).count()['count']

Type 1  Type 2  
Bug     Electric     2
        Fighting     2
        Fire         2
        Flying      14
        Ghost        1
                    ..
Water   Ice          3
        Poison       3
        Psychic      5
        Rock         4
        Steel        1
Name: count, Length: 136, dtype: int64

## **Working with large amounts of data**

In [89]:
for df in pd.read_csv('pokemon_data.csv', chunksize=5):
  print('CHUNK DF')
  print(df)

CHUNK DF
   #                   Name Type 1  ... Speed  Generation  Legendary
0  1              Bulbasaur  Grass  ...    45           1      False
1  2                Ivysaur  Grass  ...    60           1      False
2  3               Venusaur  Grass  ...    80           1      False
3  3  VenusaurMega Venusaur  Grass  ...    80           1      False
4  4             Charmander   Fire  ...    65           1      False

[5 rows x 12 columns]
CHUNK DF
   #                       Name Type 1  ... Speed  Generation  Legendary
5  5                 Charmeleon   Fire  ...    80           1      False
6  6                  Charizard   Fire  ...   100           1      False
7  6  CharizardMega Charizard X   Fire  ...   100           1      False
8  6  CharizardMega Charizard Y   Fire  ...   100           1      False
9  7                   Squirtle  Water  ...    43           1      False

[5 rows x 12 columns]
CHUNK DF
     #                     Name Type 1  ...  Speed  Generation  Legendary
1

In [90]:
new_df = pd.DataFrame(columns=df.columns)

for df in pd.read_csv('pokemon_data.csv', chunksize=5):
  results = df.groupby(['Type 1']).count()

  new_df = pd.concat([new_df, results])

new_df.head().T

Unnamed: 0,Fire,Grass,Fire.1,Water,Bug
#,1.0,4.0,4.0,1.0,2.0
Name,1.0,4.0,4.0,1.0,2.0
Type 1,,,,,
Type 2,0.0,4.0,3.0,0.0,0.0
HP,1.0,4.0,4.0,1.0,2.0
Attack,1.0,4.0,4.0,1.0,2.0
Defense,1.0,4.0,4.0,1.0,2.0
Sp. Atk,1.0,4.0,4.0,1.0,2.0
Sp. Def,1.0,4.0,4.0,1.0,2.0
Speed,1.0,4.0,4.0,1.0,2.0
