In [1]:
import pandas as pd

# Pandas

![imagen de panda](assets/pandas.png)

Pandas es una librería basada Numpy, por lo que trabaja directamente con arrays.

Para ello, pandas modela los datos tabulares de diversas fuentes (Como tablas de excel, tablas de datos en formato CSV, JSON y otros) en en DataFrames, los cuales son en simples palabras un array que representa una matriz de datos donde cada columna es un tipo de dato y cada fila es una 'tupla de datos'

## 1.  Crear un dataframe de pandas

In [4]:
df = pd.read_csv('data/pokemon.csv')
df

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0
4,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,5,80,65,80,fire,,19.0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,,797,107,101,61,steel,flying,999.9,7,1
797,['Beast Boost'],1.00,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,,798,59,31,109,grass,steel,0.1,7,1
798,['Beast Boost'],2.00,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,,799,97,53,43,dark,dragon,888.0,7,1
799,['Prism Armor'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,,800,127,89,79,psychic,,230.0,7,1


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   abilities          801 non-null    object 
 1   against_bug        801 non-null    float64
 2   against_dark       801 non-null    float64
 3   against_dragon     801 non-null    float64
 4   against_electric   801 non-null    float64
 5   against_fairy      801 non-null    float64
 6   against_fight      801 non-null    float64
 7   against_fire       801 non-null    float64
 8   against_flying     801 non-null    float64
 9   against_ghost      801 non-null    float64
 10  against_grass      801 non-null    float64
 11  against_ground     801 non-null    float64
 12  against_ice        801 non-null    float64
 13  against_normal     801 non-null    float64
 14  against_poison     801 non-null    float64
 15  against_psychic    801 non-null    float64
 16  against_rock       801 non

**Notamos que el dataset contiene mucha información**

### Podemos interpretar esto:

**Cada Fila del dataset representa un Pokemon**

**Cada Columna representa una característica en particular, para todos los pokemon**

## 2. Procedemos a explorar el nombre de todas las columnas para tener una idea de que información tiene el dataset

In [5]:
df.columns

Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
       'against_electric', 'against_fairy', 'against_fight', 'against_fire',
       'against_flying', 'against_ghost', 'against_grass', 'against_ground',
       'against_ice', 'against_normal', 'against_poison', 'against_psychic',
       'against_rock', 'against_steel', 'against_water', 'attack',
       'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
       'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
       'japanese_name', 'name', 'percentage_male', 'pokedex_number',
       'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
       'generation', 'is_legendary'],
      dtype='object')

## 3. Ver que tipo de datos contiene cada columna: Entero (int), float32/64 (Decimal), Object (String y otros)

In [8]:
df.dtypes

abilities             object
against_bug          float64
against_dark         float64
against_dragon       float64
against_electric     float64
against_fairy        float64
against_fight        float64
against_fire         float64
against_flying       float64
against_ghost        float64
against_grass        float64
against_ground       float64
against_ice          float64
against_normal       float64
against_poison       float64
against_psychic      float64
against_rock         float64
against_steel        float64
against_water        float64
attack                 int64
base_egg_steps         int64
base_happiness         int64
base_total             int64
capture_rate          object
classfication         object
defense                int64
experience_growth      int64
height_m             float64
hp                     int64
japanese_name         object
name                  object
percentage_male      float64
pokedex_number         int64
sp_attack              int64
sp_defense    

## 4. Ver los posibles valores que existen en una determinada columna

In [9]:
df['against_dragon'].unique()

array([1. , 0. , 0.5, 2. ])

## 5. Filtrar: Obtener una fila (O conjuntos de fila) para un determinado Pokemon 

Pandas nos permite incorporar condiciones lógicas para poder filtrar los datos según lo que nos interese buscar

**Encontrar un pokemon tal que en su columna name se llame 'Pikachu'**

In [11]:
df[df['name'] == 'Pikachu']

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
24,"['Static', 'Lightningrod']",1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5,1.0,...,50.0,25,50,50,90,electric,,6.0,1,0


**Encontrar todos los Pokemon Legendarios**

In [14]:
df[df['is_legendary'] == 1]


Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
143,"['Pressure', 'Snow Cloak']",0.50,1.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,...,,144,95,125,85,ice,flying,55.4,1,1
144,"['Pressure', 'Static']",0.50,1.0,1.0,1.0,1.0,0.5,1.0,0.5,1.0,...,,145,125,90,100,electric,flying,52.6,1,1
145,"['Pressure', 'Flame Body']",0.25,1.0,1.0,2.0,0.5,0.5,0.5,1.0,1.0,...,,146,125,85,90,fire,flying,60.0,1,1
149,"['Pressure', 'Unnerve']",2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,,150,194,120,140,psychic,,122.0,1,1
150,['Synchronize'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,,151,100,100,100,psychic,,4.0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,,797,107,101,61,steel,flying,999.9,7,1
797,['Beast Boost'],1.00,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,,798,59,31,109,grass,steel,0.1,7,1
798,['Beast Boost'],2.00,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,,799,97,53,43,dark,dragon,888.0,7,1
799,['Prism Armor'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,,800,127,89,79,psychic,,230.0,7,1


**Condiciones mas complejas como encontrar un pokemon tal que sea tipo fuego, acero y su ataque especial sea mayor a 100**

In [16]:
fire_steel = df[(df['type1'] == 'fire') & (df['type2'] == 'steel') & (df['sp_attack'] > 100) | (df['type1'] == 'steel') & (df['type2'] == 'fire') & (df['sp_attack'] > 100)]
fire_steel

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
484,"['Flash Fire', 'Flame Body']",0.25,1.0,0.5,1.0,0.25,2.0,1.0,0.5,1.0,...,50.0,485,130,106,77,fire,steel,430.0,4,1


In [17]:
fire_steel['name']

484    Heatran
Name: name, dtype: object

## 7. Seleccionar un intervalo de datos

In [19]:
df.iloc[0:5]


Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0
4,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,5,80,65,80,fire,,19.0,1,0


**Invertir el dataFrame**

In [22]:
df.iloc[::-1]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
800,['Soul-Heart'],0.25,0.5,0.0,1.0,0.5,1.0,2.0,0.5,1.0,...,,801,130,115,65,steel,fairy,80.5,7,1
799,['Prism Armor'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,,800,127,89,79,psychic,,230.0,7,1
798,['Beast Boost'],2.00,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,,799,97,53,43,dark,dragon,888.0,7,1
797,['Beast Boost'],1.00,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,,798,59,31,109,grass,steel,0.1,7,1
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,,797,107,101,61,steel,flying,999.9,7,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,5,80,65,80,fire,,19.0,1,0
3,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0
2,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
1,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0


**Similar a la lógica de las listas, podemos acceder al último elemento tomando como indice inicial el -1**

In [21]:
df.iloc[-1::]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
800,['Soul-Heart'],0.25,0.5,0.0,1.0,0.5,1.0,2.0,0.5,1.0,...,,801,130,115,65,steel,fairy,80.5,7,1


In [35]:
df.iloc[790:-1]

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
790,['Full Metal Body'],1.0,2.0,0.5,1.0,0.5,1.0,2.0,0.5,2.0,...,,791,113,89,97,psychic,steel,230.0,7,1
791,['Shadow Shield'],1.0,4.0,1.0,1.0,1.0,0.0,1.0,1.0,4.0,...,,792,137,107,97,psychic,ghost,120.0,7,1
792,['Beast Boost'],0.5,1.0,1.0,1.0,0.5,1.0,0.5,0.5,1.0,...,,793,127,131,103,rock,poison,55.5,7,1
793,['Beast Boost'],0.5,0.5,1.0,1.0,2.0,0.5,2.0,4.0,1.0,...,,794,53,53,79,bug,fighting,333.6,7,1
794,['Beast Boost'],0.5,0.5,1.0,1.0,2.0,0.5,2.0,4.0,1.0,...,,795,137,37,151,bug,fighting,25.0,7,1
795,['Beast Boost'],1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5,1.0,...,,796,173,71,83,electric,,100.0,7,1
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,,797,107,101,61,steel,flying,999.9,7,1
797,['Beast Boost'],1.0,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,,798,59,31,109,grass,steel,0.1,7,1
798,['Beast Boost'],2.0,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,,799,97,53,43,dark,dragon,888.0,7,1
799,['Prism Armor'],2.0,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,,800,127,89,79,psychic,,230.0,7,1


## 8. Quedarnos solo con un subconjunto de columnas

In [38]:
df[['name', 'type1', 'type2']]


Unnamed: 0,name,type1,type2
0,Bulbasaur,grass,poison
1,Ivysaur,grass,poison
2,Venusaur,grass,poison
3,Charmander,fire,
4,Charmeleon,fire,
...,...,...,...
796,Celesteela,steel,flying
797,Kartana,grass,steel
798,Guzzlord,dark,dragon
799,Necrozma,psychic,


## Crear nuevas columnas en función de las otras

In [39]:
df['stats_sum'] = df['hp'] + df['attack'] + df['defense'] + df['sp_attack'] + df['sp_defense'] + df['speed']
df

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary,stats_sum
0,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,1,65,65,45,grass,poison,6.9,1,0,318
1,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,2,80,80,60,grass,poison,13.0,1,0,405
2,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,3,122,120,80,grass,poison,100.0,1,0,625
3,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,4,60,50,65,fire,,8.5,1,0,309
4,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,5,80,65,80,fire,,19.0,1,0,405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,797,107,101,61,steel,flying,999.9,7,1,570
797,['Beast Boost'],1.00,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,798,59,31,109,grass,steel,0.1,7,1,570
798,['Beast Boost'],2.00,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,799,97,53,43,dark,dragon,888.0,7,1,570
799,['Prism Armor'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,800,127,89,79,psychic,,230.0,7,1,600
