# Pandas
Panel - Data
- Load
- Prepare  
- Model
- Manipulate
- Analyze

### DataFrames
Are Two-dimensional tagged data structures

Has 3 components: 
- data
- index (Rows)
- columns

In [2]:
import pandas as pd
import numpy as np

In [3]:
data = np.array([['', "col1", "col2","col3","col4"], 
                 ["row1",11,22,33,44],
                 ["row2",55,66,77,88],
                 ["row3",99,1010,1111,1212],
                 ["row4",1313,1414,1515,1616]])

In [4]:
df = pd.DataFrame(data=data[1:,1:], index=data[1:,0], columns=data[0,1:])

In [5]:
print(df)
print(df.shape)
df['col1']['row1']

      col1  col2  col3  col4
row1    11    22    33    44
row2    55    66    77    88
row3    99  1010  1111  1212
row4  1313  1414  1515  1616
(4, 4)


'11'

In [6]:
df2 = pd.DataFrame(np.array([[1,2,3],[4,5,6],[7,8,9]]))
print(df2)
print(df2.shape)

   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9
(3, 3)


In [7]:
series = pd.Series({"Argentina": "Buenos Aires", "Chile": "Santiago de Chile", "Colombia": "Bogotá", "Peru":"Lima", "Venezuela":"Caracas", "Mexico":"Mexico D.F."})

In [8]:
print(series)
print(series.shape)

Argentina         Buenos Aires
Chile        Santiago de Chile
Colombia                Bogotá
Peru                      Lima
Venezuela              Caracas
Mexico             Mexico D.F.
dtype: object
(6,)


In [9]:
series2 = pd.Series({"Argentina": ["Buenos Aires", "Rosario", "La Plata"], "Chile": "Santiago de Chile", "Colombia": ["Bogotá", "Medellin", "Cali"], "Peru":"Lima", "Venezuela":"Caracas", "Mexico":"Mexico D.F."})

In [10]:
print(series2)
print(series2.shape)

Argentina    [Buenos Aires, Rosario, La Plata]
Chile                        Santiago de Chile
Colombia              [Bogotá, Medellin, Cali]
Peru                                      Lima
Venezuela                              Caracas
Mexico                             Mexico D.F.
dtype: object
(6,)


In [11]:
df3 = pd.DataFrame(np.array([[1,2,3,4,5], [6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25]]))

In [12]:
print(len(df3.index))
print(len(df3.columns))

5
5


In [13]:
df3.describe()

Unnamed: 0,0,1,2,3,4
count,5.0,5.0,5.0,5.0,5.0
mean,11.0,12.0,13.0,14.0,15.0
std,7.905694,7.905694,7.905694,7.905694,7.905694
min,1.0,2.0,3.0,4.0,5.0
25%,6.0,7.0,8.0,9.0,10.0
50%,11.0,12.0,13.0,14.0,15.0
75%,16.0,17.0,18.0,19.0,20.0
max,21.0,22.0,23.0,24.0,25.0


In [14]:
df4 = pd.DataFrame(np.random.randint(0,50,size=(100,5)))

In [15]:
df4.describe()

Unnamed: 0,0,1,2,3,4
count,100.0,100.0,100.0,100.0,100.0
mean,24.35,23.23,25.39,26.61,22.37
std,13.596921,14.649339,14.030915,14.096991,14.520383
min,0.0,0.0,0.0,1.0,1.0
25%,14.0,10.0,16.5,15.0,9.75
50%,24.5,22.0,25.5,27.5,21.5
75%,35.5,37.25,37.0,40.0,34.25
max,49.0,49.0,48.0,49.0,49.0


In [16]:
df4.corr()

Unnamed: 0,0,1,2,3,4
0,1.0,0.11633,0.089498,0.099055,-0.028955
1,0.11633,1.0,0.091653,-0.005675,-0.071444
2,0.089498,0.091653,1.0,0.201016,0.158236
3,0.099055,-0.005675,0.201016,1.0,-0.04898
4,-0.028955,-0.071444,0.158236,-0.04898,1.0


In [17]:
df4.count() # Count of not null elements

0    100
1    100
2    100
3    100
4    100
dtype: int64

In [18]:
df4.max() # Max Value

0    49
1    49
2    48
3    49
4    49
dtype: int32

In [19]:
df4.min() # Min value

0    0
1    0
2    0
3    1
4    1
dtype: int32

In [20]:
df4.mean() # Mean of all data

0    24.35
1    23.23
2    25.39
3    26.61
4    22.37
dtype: float64

In [21]:
df4.std() # Standard Deviation

0    13.596921
1    14.649339
2    14.030915
3    14.096991
4    14.520383
dtype: float64

In [22]:
df4.median() # Middle Value

0    24.5
1    22.0
2    25.5
3    27.5
4    21.5
dtype: float64

In [23]:
df4[0] # Column

0     26
1      0
2     38
3     44
4     37
      ..
95    18
96    27
97    31
98    42
99    45
Name: 0, Length: 100, dtype: int32

In [24]:
df4[[0,2,4]] # Two Columns

Unnamed: 0,0,2,4
0,26,17,19
1,0,27,14
2,38,27,4
3,44,43,32
4,37,6,26
...,...,...,...
95,18,3,3
96,27,28,2
97,31,2,25
98,42,27,35


In [25]:
df4.iloc[0][2] # same as M[0,2] Row 0 column 2

17

In [26]:
df4.loc[0] # Row 0 all columns

0    26
1    43
2    17
3    14
4    19
Name: 0, dtype: int32

In [27]:
df4.iloc[0,:] # Row 0 all 

0    26
1    43
2    17
3    14
4    19
Name: 0, dtype: int32

In [28]:
df4.iloc[0,:] = [None,None,None,None,None]

In [29]:
df4.isnull()

Unnamed: 0,0,1,2,3,4
0,True,True,True,True,True
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
95,False,False,False,False,False
96,False,False,False,False,False
97,False,False,False,False,False
98,False,False,False,False,False


In [30]:
df4.isnull().sum()

0    1
1    1
2    1
3    1
4    1
dtype: int64

In [31]:
# pd.dropna() # Row

In [32]:
# df4.dropna(axis=1) # Columns

In [33]:
df4.fillna('ND')

Unnamed: 0,0,1,2,3,4
0,ND,ND,ND,ND,ND
1,0,37,27,20,14
2,38,40,27,32,4
3,44,6,43,17,32
4,37,9,6,18,26
...,...,...,...,...,...
95,18,44,3,8,3
96,27,9,28,40,2
97,31,22,2,41,25
98,42,44,27,34,35


# DataSets

In [34]:
pk = pd.read_csv('Datasets/pokemon_data.csv')

In [35]:
pk.head(5)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False


In [36]:
pk.tail(5)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True
799,721,Volcanion,Fire,Water,80,110,120,130,90,70,6,True


In [38]:
pk.dtypes

#              int64
Name          object
Type 1        object
Type 2        object
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [None]:
pk_excel = pd.read_excel('Datasets/pokemon_data.xlsx')

In [None]:
pk_excel.head(3)

In [None]:
pk_excel.tail(3)

In [None]:
pk_txt = pd.read_csv('Datasets/pokemon_data.txt')

In [None]:
pk_txt.head(3)

In [None]:
pk_txt.tail(3)

In [None]:
pk_txt = pd.read_csv('Datasets/pokemon_data.txt', delimiter='\t')

In [None]:
pk_txt.head(3)

In [None]:
pk.columns

In [None]:
pk['Name'][0:5]

In [None]:
pk.Name[0:5]

In [None]:
pk[['Name','HP','Attack']][0:5]

In [None]:
pk.iloc[2]

In [None]:
pk.iloc[0:3]

In [None]:
count = 3
for idx, row in pk.iterrows():
    if count == 0:
        break
    else:
        print(idx, row)
        count -= 1

In [None]:
count = 3
for idx, row in pk.iterrows():
    if count == 0:
        break
    else:
        print(idx, row['Name'])
        count -= 1

# Filter Data

In [None]:
pk.loc[pk['Type 1'] == "Fire"][0:3]

In [None]:
pk.loc[pk['Type 1'] == "Grass"][0:3]

In [None]:
pk.loc[pk['Type 1'] == "Water"][0:3]

In [None]:
pk.loc[pk['Defense'] >= 100][:3]

In [None]:
pk.describe()

In [None]:
pk.sort_values('HP', ascending=False)[:10]

In [None]:
pk.sort_values(['Type 1','HP'], ascending=False)[:10]

# Some Changes

In [None]:
pk['Total'] = pk['HP'] + pk['Attack'] + pk['Defense'] + pk['Sp. Atk'] + pk['Sp. Def'] + pk['Speed']

In [None]:
pk.head(5)

In [None]:
pk.drop(columns=['Total'])

In [None]:
pk['Total'] = pk.iloc[:, 4:10].sum(axis=1)

In [None]:
pk.head(3)

In [None]:
pk2 = pk[['Name', 'Total', 'HP', 'Defense']]

In [None]:
pk2.head(3)

In [None]:
grass = pk.loc[pk['Type 1'] == "Grass"].loc[pk['HP'] > 90]

In [None]:
grass

In [None]:
grass = pk.loc[(pk['Type 1'] == "Grass") & (pk['HP'] > 90)]
grass

In [None]:
cols = pk.columns.values
cols

In [None]:
cols2 = list(pk.columns.values)
cols2

In [None]:
pk3 = pk[cols2[0:4] + [cols2[-1]] + cols2[4:12]]
pk3

# Export Data

In [104]:
grass = pk.loc[pk['Type 1'] == 'Grass']
grass.to_csv('grass_type.csv', index=False)
# grass.to_excel('grass_type.xlsx', index=False)
# grass.to_csv('grass_type.txt', index=False, sep='\t')
grass[:3]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318,1
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405,1
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525,1


In [111]:
water = pk.loc[pk['Type 1'] == 'Water']
#water.to_csv('water_type.csv', index=False)
water[:3]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
9,7,Squirtle,Water,,44,48,65,50,64,43,1,False,314,1
10,8,Wartortle,Water,,59,63,80,65,80,58,1,False,405,1
11,9,Blastoise,Water,,79,83,100,85,105,78,1,False,530,1


In [106]:
fire = pk.loc[pk['Type 1'] == 'Fire']
fire.to_csv('fire_type.csv', index=False)
fire[:3]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
4,4,Charmander,Fire,,39,52,43,60,50,65,1,True,309,1
5,5,Charmeleon,Fire,,58,64,58,80,65,80,1,True,405,1
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,True,534,1


In [112]:
water = water.reset_index()
water[:15]

Unnamed: 0,index,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
0,9,7,Squirtle,Water,,44,48,65,50,64,43,1,False,314,1
1,10,8,Wartortle,Water,,59,63,80,65,80,58,1,False,405,1
2,11,9,Blastoise,Water,,79,83,100,85,105,78,1,False,530,1
3,12,9,BlastoiseMega Blastoise,Water,,79,103,120,135,115,78,1,False,630,1
4,59,54,Psyduck,Water,,50,52,48,65,50,55,1,False,320,1
5,60,55,Golduck,Water,,80,82,78,95,80,85,1,False,500,1
6,65,60,Poliwag,Water,,40,50,40,40,40,90,1,False,300,1
7,66,61,Poliwhirl,Water,,65,65,65,50,50,90,1,False,385,1
8,67,62,Poliwrath,Water,Fighting,90,95,95,70,90,70,1,False,510,1
9,78,72,Tentacool,Water,Poison,40,40,35,50,100,70,1,False,335,1


In [113]:
fire = fire.reset_index(drop=True)
fire[:15]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
0,4,Charmander,Fire,,39,52,43,60,50,65,1,True,309,1
1,5,Charmeleon,Fire,,58,64,58,80,65,80,1,True,405,1
2,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,True,534,1
3,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,True,634,1
4,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,True,634,1
5,37,Vulpix,Fire,,38,41,40,50,65,65,1,True,299,1
6,38,Ninetales,Fire,,73,76,75,81,100,100,1,True,505,1
7,58,Growlithe,Fire,,55,70,45,70,50,60,1,True,350,1
8,59,Arcanine,Fire,,90,110,80,100,80,95,1,True,555,1
9,77,Ponyta,Fire,,50,85,55,65,65,90,1,True,410,1


In [116]:
megas = pk.loc[pk['Name'].str.contains('Mega')]
megas[:3]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,625,1
7,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,True,634,1
8,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,True,634,1


In [118]:
no_megas = pk.loc[~pk['Name'].str.contains('Mega')]
no_megas[:3]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318,1
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405,1
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525,1


In [122]:
# Import Regular Exp
import re

fire_water = pk.loc[pk['Type 1'].str.contains('Fire|Water', regex=True)]
fire_water[:10]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
4,4,Charmander,Fire,,39,52,43,60,50,65,1,True,309,1
5,5,Charmeleon,Fire,,58,64,58,80,65,80,1,True,405,1
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,True,534,1
7,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,True,634,1
8,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,True,634,1
9,7,Squirtle,Water,,44,48,65,50,64,43,1,False,314,1
10,8,Wartortle,Water,,59,63,80,65,80,58,1,False,405,1
11,9,Blastoise,Water,,79,83,100,85,105,78,1,False,530,1
12,9,BlastoiseMega Blastoise,Water,,79,103,120,135,115,78,1,False,630,1
42,37,Vulpix,Fire,,38,41,40,50,65,65,1,True,299,1


In [123]:
grass_water = pk.loc[pk['Type 1'].str.contains('grass|water', flags=re.I, regex=True)]
grass_water[:10]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318,1
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405,1
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525,1
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,625,1
9,7,Squirtle,Water,,44,48,65,50,64,43,1,False,314,1
10,8,Wartortle,Water,,59,63,80,65,80,58,1,False,405,1
11,9,Blastoise,Water,,79,83,100,85,105,78,1,False,530,1
12,9,BlastoiseMega Blastoise,Water,,79,103,120,135,115,78,1,False,630,1
48,43,Oddish,Grass,Poison,45,50,55,75,65,30,1,False,320,1
49,44,Gloom,Grass,Poison,60,65,70,85,75,40,1,False,395,1


In [135]:
ad_pk = pk.loc[pk['Name'].str.contains('fr[a-z]*', flags=re.I, regex=True)]
ad_pk[:10]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
15,12,Butterfree,Bug,Flying,60,45,50,90,80,70,1,False,395,1
530,478,Froslass,Ice,Ghost,70,80,70,80,70,110,4,False,480,1
534,479,RotomFrost Rotom,Electric,Ice,50,65,107,105,107,86,4,False,520,1
653,592,Frillish,Water,Ghost,55,40,50,65,85,40,5,False,335,1
672,611,Fraxure,Dragon,,66,117,70,40,50,67,5,False,410,1
724,656,Froakie,Water,,41,56,40,62,44,71,6,False,314,1
725,657,Frogadier,Water,,54,63,52,83,56,97,6,False,405,1
744,676,Furfrou,Normal,,75,80,60,65,90,102,6,False,472,1


In [142]:
pi_pk = pk.loc[pk['Name'].str.contains('^pi[a-z]', flags=re.I, regex=True)]
pi_pk[:10]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
20,16,Pidgey,Normal,Flying,40,45,40,35,35,56,1,False,251,1
21,17,Pidgeotto,Normal,Flying,63,60,55,50,50,71,1,False,349,1
22,18,Pidgeot,Normal,Flying,83,80,75,70,70,101,1,False,479,1
23,18,PidgeotMega Pidgeot,Normal,Flying,83,80,80,135,80,121,1,False,579,1
30,25,Pikachu,Electric,,35,55,40,50,50,90,1,False,320,1
136,127,Pinsir,Bug,,65,125,100,55,70,85,1,False,500,1
137,127,PinsirMega Pinsir,Bug,Flying,65,155,120,65,90,105,1,False,600,1
186,172,Pichu,Electric,,20,40,15,35,35,60,2,False,205,1
219,204,Pineco,Bug,,50,65,90,35,35,15,2,False,290,1
239,221,Piloswine,Ice,Ground,100,100,80,60,60,50,2,False,450,1


In [143]:
pk2 = pk

In [150]:
pk2.loc[pk2['Type 1'] == 'Fire', 'Legendary'] = True
# pk2.loc[(pk2['Type 1'] == 'Fire') & (pk2['HP'] > 100), ['Legendary', 'Total']] = True
pk2[:15]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False,318,1
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False,405,1
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False,525,1
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False,625,1
4,4,Charmander,Fire,,39,52,43,60,50,65,1,True,True,1
5,5,Charmeleon,Fire,,58,64,58,80,65,80,1,True,True,1
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,1,True,True,1
7,6,CharizardMega Charizard X,Fire,Dragon,78,130,111,130,85,100,1,True,True,1
8,6,CharizardMega Charizard Y,Fire,Flying,78,104,78,159,115,100,1,True,True,1
9,7,Squirtle,Water,,44,48,65,50,64,43,1,False,314,1


In [151]:
pk.groupby(['Type 1']).mean()

Unnamed: 0_level_0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,count
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bug,334.492754,56.884058,70.971014,70.724638,53.869565,64.797101,61.681159,3.217391,0.0,1.0
Dark,461.354839,66.806452,88.387097,70.225806,74.645161,69.516129,76.16129,4.032258,0.064516,1.0
Dragon,474.375,83.3125,112.125,86.375,96.84375,88.84375,83.03125,3.875,0.375,1.0
Electric,363.5,59.795455,69.090909,66.295455,90.022727,73.704545,84.5,3.272727,0.090909,1.0
Fairy,449.529412,74.117647,61.529412,65.705882,78.529412,84.705882,48.588235,4.117647,0.058824,1.0
Fighting,363.851852,69.851852,96.777778,65.925926,53.111111,64.703704,66.074074,3.37037,0.0,1.0
Fire,327.403846,69.903846,84.769231,67.769231,88.980769,72.211538,74.442308,3.211538,1.0,1.0
Flying,677.75,70.75,78.75,66.25,94.25,72.5,102.5,5.5,0.5,1.0
Ghost,486.5,64.4375,73.78125,81.1875,79.34375,76.46875,64.34375,4.1875,0.0625,1.0
Grass,344.871429,67.271429,73.214286,70.8,77.5,70.428571,61.928571,3.357143,0.042857,1.0


In [164]:
pk.groupby(['Type 1']).mean().sort_values('Attack', ascending=False)

Unnamed: 0_level_0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,count
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Dragon,474.375,83.3125,112.125,86.375,96.84375,88.84375,83.03125,3.875,0.375,1.0
Fighting,363.851852,69.851852,96.777778,65.925926,53.111111,64.703704,66.074074,3.37037,0.0,1.0
Ground,356.28125,73.78125,95.75,84.84375,56.46875,62.75,63.90625,3.15625,0.125,1.0
Rock,392.727273,65.363636,92.863636,100.795455,63.340909,75.477273,55.909091,3.454545,0.090909,1.0
Steel,442.851852,65.222222,92.703704,126.37037,67.518519,80.62963,55.259259,3.851852,0.148148,1.0
Dark,461.354839,66.806452,88.387097,70.225806,74.645161,69.516129,76.16129,4.032258,0.064516,1.0
Fire,327.403846,69.903846,84.769231,67.769231,88.980769,72.211538,74.442308,3.211538,1.0,1.0
Flying,677.75,70.75,78.75,66.25,94.25,72.5,102.5,5.5,0.5,1.0
Poison,251.785714,67.25,74.678571,68.821429,60.428571,64.392857,63.571429,2.535714,0.0,1.0
Water,303.089286,72.0625,74.151786,72.946429,74.8125,70.517857,65.964286,2.857143,0.035714,1.0


In [160]:
pk.groupby(['Type 1']).count().sort_values('#', ascending=False)

Unnamed: 0_level_0,#,Name,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total,count
Type 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Water,112,112,53,112,112,112,112,112,112,112,112,112,112
Normal,98,98,37,98,98,98,98,98,98,98,98,98,98
Grass,70,70,37,70,70,70,70,70,70,70,70,70,70
Bug,69,69,52,69,69,69,69,69,69,69,69,69,69
Psychic,57,57,19,57,57,57,57,57,57,57,57,57,57
Fire,52,52,24,52,52,52,52,52,52,52,52,52,52
Electric,44,44,17,44,44,44,44,44,44,44,44,44,44
Rock,44,44,35,44,44,44,44,44,44,44,44,44,44
Ghost,32,32,22,32,32,32,32,32,32,32,32,32,32
Ground,32,32,19,32,32,32,32,32,32,32,32,32,32


In [165]:
pk['count'] = 1
pk.groupby(['Type 1']).count()['count']

Type 1
Bug          69
Dark         31
Dragon       32
Electric     44
Fairy        17
Fighting     27
Fire         52
Flying        4
Ghost        32
Grass        70
Ground       32
Ice          24
Normal       98
Poison       28
Psychic      57
Rock         44
Steel        27
Water       112
Name: count, dtype: int64

In [171]:
pk.groupby(['Type 1','Type 2']).count()['count'][:10]

Type 1  Type 2  
Bug     Electric     2
        Fighting     2
        Fire         2
        Flying      14
        Ghost        1
        Grass        6
        Ground       2
        Poison      12
        Rock         3
        Steel        7
Name: count, dtype: int64

In [173]:
chunk_c = 0
for df in pd.read_csv('Datasets/pokemon_data.csv', chunksize=70):
    print(f'Chunk {chunk_c}')
    print(df)
    chunk_c += 1

Chunk 0
     #                   Name   Type 1    Type 2  HP  Attack  Defense  \
0    1              Bulbasaur    Grass    Poison  45      49       49   
1    2                Ivysaur    Grass    Poison  60      62       63   
2    3               Venusaur    Grass    Poison  80      82       83   
3    3  VenusaurMega Venusaur    Grass    Poison  80     100      123   
4    4             Charmander     Fire       NaN  39      52       43   
..  ..                    ...      ...       ...  ..     ...      ...   
65  60                Poliwag    Water       NaN  40      50       40   
66  61              Poliwhirl    Water       NaN  65      65       65   
67  62              Poliwrath    Water  Fighting  90      95       95   
68  63                   Abra  Psychic       NaN  25      20       15   
69  64                Kadabra  Psychic       NaN  40      35       30   

    Sp. Atk  Sp. Def  Speed  Generation  Legendary  
0        65       65     45           1      False  
1        

In [179]:
new_pk = pd.DataFrame(columns=pk.columns)
for df in pd.read_csv('Datasets/pokemon_data.csv', chunksize=5):
    res = df.groupby(['Type 1']).count()
    new_pk = pd.concat([new_pk, res], sort=True)

In [180]:
new_pk[:10]

Unnamed: 0,#,Attack,Defense,Generation,HP,Legendary,Name,Sp. Atk,Sp. Def,Speed,Total,Type 1,Type 2,count
Fire,1,1,1,1,1,1,1,1,1,1,,,0,
Grass,4,4,4,4,4,4,4,4,4,4,,,4,
Fire,4,4,4,4,4,4,4,4,4,4,,,3,
Water,1,1,1,1,1,1,1,1,1,1,,,0,
Bug,2,2,2,2,2,2,2,2,2,2,,,0,
Water,3,3,3,3,3,3,3,3,3,3,,,0,
Bug,5,5,5,5,5,5,5,5,5,5,,,5,
Normal,5,5,5,5,5,5,5,5,5,5,,,4,
Normal,3,3,3,3,3,3,3,3,3,3,,,2,
Poison,2,2,2,2,2,2,2,2,2,2,,,0,
