# Pandas learning

---

In [143]:
import matplotlib.pyplot as plt
import pandas as pd

##### First learn how to **create** simple dataframes from elements' list and **save** it as a csv file.

In [144]:
names = ['Bob', 'Jessica', 'Mary', 'John', 'Mel']
births = [968, 155, 77, 578, 973]
baby_dataset = list(zip(names, births))
print('baby_dataset: ', baby_dataset)

baby_dataset:  [('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]


In [145]:
df = pd.DataFrame(data=baby_dataset, columns=['Names', 'Births'])
df

Unnamed: 0,Names,Births
0,Bob,968
1,Jessica,155
2,Mary,77
3,John,578
4,Mel,973


In [146]:
df.to_csv('births1880.csv', index=False, header=False)
# so easy :D

---

##### Now take a differente dataframe and **read** it from csv. We can do some interesting things.

In [147]:
df = pd.read_csv('pokemon_data.csv')
df

# But we have different options
# Excel format...
# pd.read_excel('pokemon_data.xlsx')

# Or a file with data separated with different characters e.g. \t
# pd.read_csv('pokemon_data.csv', delimiter='\t') 


Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True


##### Use **.head()** and **.tail()** functions.
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html

In [148]:
df.head(2)


Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False


In [149]:
df.tail(2)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
798,720,HoopaHoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True
799,721,Volcanion,Fire,Water,80,110,120,130,90,70,6,True


---

# Reading data in DataFrames.

In [150]:
print(df.columns, '\n type: ', type(df.columns))

Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object') 
 type:  <class 'pandas.core.indexes.base.Index'>


In [151]:
# Select an specific column
print(df['Name'], '\n', type(df['Name']))

0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander
               ...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object 
 <class 'pandas.core.series.Series'>


In [152]:
# Due to df['Any column'] returns Series object I can use .tail() function.
df['Name'].tail(2)

798    HoopaHoopa Unbound
799             Volcanion
Name: Name, dtype: object

In [153]:
# Or use python list selector.
df['Name'][0:5]

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object

In [154]:
# And use dot selector, to select a column.
df.Name[0:5]

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object

In [155]:
# You can select several columns like this.
df[['Name', 'HP', 'Type 1']]

Unnamed: 0,Name,HP,Type 1
0,Bulbasaur,45,Grass
1,Ivysaur,60,Grass
2,Venusaur,80,Grass
3,VenusaurMega Venusaur,80,Grass
4,Charmander,39,Fire
...,...,...,...
795,Diancie,50,Rock
796,DiancieMega Diancie,50,Rock
797,HoopaHoopa Confined,80,Psychic
798,HoopaHoopa Unbound,80,Psychic


##### If you want to read an specific row use **.iloc** function.
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html

In [156]:
# n.b. It returns a Series object
print(df.iloc[0], '\n', type(df.iloc[0]))

#                     1
Name          Bulbasaur
Type 1            Grass
Type 2           Poison
HP                   45
Attack               49
Defense              49
Sp. Atk              65
Sp. Def              65
Speed                45
Generation            1
Legendary         False
Name: 0, dtype: object 
 <class 'pandas.core.series.Series'>


In [157]:
# But here it returns a Dataframe.
df.iloc[0:3]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False


##### You can get an specific cell by several ways...
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html#pandas.DataFrame.at

In [158]:
# It corresponds to row 1 column HP
df.iloc[1, 4]


60

In [159]:
# And here too, but selecting it with column name.
df.at[1, 'HP']

60