# Pandas learning

---

In [68]:
import matplotlib.pyplot as plt
import pandas as pd

##### First learn how to **create** simple dataframes from elements' list and **save** it as a csv file.

In [69]:
names = ['Bob', 'Jessica', 'Mary', 'John', 'Mel']
births = [968, 155, 77, 578, 973]
baby_dataset = list(zip(names, births))
print('baby_dataset: ', baby_dataset)

baby_dataset:  [('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]


In [70]:
df = pd.DataFrame(data=baby_dataset, columns=['Names', 'Births'])
print(df)

     Names  Births
0      Bob     968
1  Jessica     155
2     Mary      77
3     John     578
4      Mel     973


In [71]:
df.to_csv('births1880.csv', index=False, header=False)
# so easy :D

---

##### Now take a differente dataframe and **read** it from csv. We can do some interesting things.

In [72]:
df = pd.read_csv('pokemon_data.csv')
print(df)

# But we have different options
"""
    Excel format...
    pd.read_excel('pokemon_data.xlsx')

    Or a file with data separated with different characters e.g. \t
    pd.read_csv('pokemon_data.csv', delimiter='\t') 
"""

       #                   Name   Type 1  Type 2  HP  Attack  Defense  \
0      1              Bulbasaur    Grass  Poison  45      49       49   
1      2                Ivysaur    Grass  Poison  60      62       63   
2      3               Venusaur    Grass  Poison  80      82       83   
3      3  VenusaurMega Venusaur    Grass  Poison  80     100      123   
4      4             Charmander     Fire     NaN  39      52       43   
..   ...                    ...      ...     ...  ..     ...      ...   
795  719                Diancie     Rock   Fairy  50     100      150   
796  719    DiancieMega Diancie     Rock   Fairy  50     160      110   
797  720    HoopaHoopa Confined  Psychic   Ghost  80     110       60   
798  720     HoopaHoopa Unbound  Psychic    Dark  80     160       60   
799  721              Volcanion     Fire   Water  80     110      120   

     Sp. Atk  Sp. Def  Speed  Generation  Legendary  
0         65       65     45           1      False  
1         80   

"\n    Excel format...\n    pd.read_excel('pokemon_data.xlsx')\n\n    Or a file with data separated with different characters e.g. \t\n    pd.read_csv('pokemon_data.csv', delimiter='\t') \n"

##### Use **.head()** and **.tail()** functions.
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html

In [73]:
# Or take the n top elements...
print('---------- head --------------')
print(df.head(2))

# Or the k bottom elements too.
print('\n---------- tail --------------')
print(df.tail(2))

---------- head --------------
   #       Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  \
0  1  Bulbasaur  Grass  Poison  45      49       49       65       65     45   
1  2    Ivysaur  Grass  Poison  60      62       63       80       80     60   

   Generation  Legendary  
0           1      False  
1           1      False  

---------- tail --------------
       #                Name   Type 1 Type 2  HP  Attack  Defense  Sp. Atk  \
798  720  HoopaHoopa Unbound  Psychic   Dark  80     160       60      170   
799  721           Volcanion     Fire  Water  80     110      120      130   

     Sp. Def  Speed  Generation  Legendary  
798      130     80           6       True  
799       90     70           6       True  


---

# Reading data in DataFrames.

In [74]:
print(df.columns, '\n type: ', type(df.columns))

Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object') 
 type:  <class 'pandas.core.indexes.base.Index'>


In [75]:
print('Select an specific column')
print(df['Name'], '\n', type(df['Name']))
print('\n')

# Due to df['Any column'] returns Series object I can use .tail() function.
print("Due to df['Any column'] returns Series object I can use .tail() function.")
print(df['Name'].tail(2))
print('\n')

Select an specific column
0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander
               ...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object 
 <class 'pandas.core.series.Series'>


Due to df['Any column'] returns Series object I can use .tail() function.
798    HoopaHoopa Unbound
799             Volcanion
Name: Name, dtype: object




In [76]:
# Or use python list selector.
print('Or use python list selector')
print(df['Name'][0:2])
print('\n')

# And use dot selector, to select a column.
print('And use dot selector')
print(df.Name[0:2])
print('\n')

# You can select several columns like this.
print('You can select several columns like this')
print(df[['Name', 'HP', 'Type 1']])

Or use python list selector
0    Bulbasaur
1      Ivysaur
Name: Name, dtype: object


And use dot selector
0    Bulbasaur
1      Ivysaur
Name: Name, dtype: object


You can select several columns like this
                      Name  HP   Type 1
0                Bulbasaur  45    Grass
1                  Ivysaur  60    Grass
2                 Venusaur  80    Grass
3    VenusaurMega Venusaur  80    Grass
4               Charmander  39     Fire
..                     ...  ..      ...
795                Diancie  50     Rock
796    DiancieMega Diancie  50     Rock
797    HoopaHoopa Confined  80  Psychic
798     HoopaHoopa Unbound  80  Psychic
799              Volcanion  80     Fire

[800 rows x 3 columns]


##### If you want to read an specific row use **.iloc** function.
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html

In [81]:
# n.b. It returns a Series object
print(df.iloc[0], '\n', type(df.iloc[0]))
print('\n')
# But here it returns a Dataframe.
print(df.iloc[0:3])

#                     1
Name          Bulbasaur
Type 1            Grass
Type 2           Poison
HP                   45
Attack               49
Defense              49
Sp. Atk              65
Sp. Def              65
Speed                45
Generation            1
Legendary         False
Name: 0, dtype: object 
 <class 'pandas.core.series.Series'>


   #       Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  \
0  1  Bulbasaur  Grass  Poison  45      49       49       65       65     45   
1  2    Ivysaur  Grass  Poison  60      62       63       80       80     60   
2  3   Venusaur  Grass  Poison  80      82       83      100      100     80   

   Generation  Legendary  
0           1      False  
1           1      False  
2           1      False  


##### You can get an specific cell by several ways...
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html#pandas.DataFrame.at

In [86]:
# It corresponds to row 1 column HP
print(df.iloc[1, 4])
print(df.at[1, 'HP'])

60
60
