# Filtrar datos en DataFrames

In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
datos = {"pais" : ["Estados Unidos", "China", "Brasil", "India", "México"],
         "km2": [9833517, 9600000, 8515767, 3287263, 1964375]}

paises = pd.DataFrame(datos)
paises

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
1,China,9600000
2,Brasil,8515767
3,India,3287263
4,México,1964375


## ¿Cómo filtrar datos?

In [3]:
filtro = [True, False, False, False, True] # igual que en numpy
paises[filtro]

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
4,México,1964375


Paises con superficie mayor a 3287263 km^2

In [6]:
filtro = paises["km2"] > 3287263

In [7]:
paises[filtro]

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
1,China,9600000
2,Brasil,8515767


In [8]:
paises[paises["km2"] > 3287263]

Unnamed: 0,pais,km2
0,Estados Unidos,9833517
1,China,9600000
2,Brasil,8515767


Crear DataFrames a partir de datos_paises.csv

In [9]:
paises = pd.read_csv("../datos/datos_paises.csv", 
                     index_col = 0)
paises.head()

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
China,Asia,9600000.0,1409517
India,Asia,3287263.0,1339180
United States of America,America,9833517.0,324460
Indonesia,Asia,1910931.0,263991
Brazil,America,8515767.0,209288


In [12]:
paises.loc["China"]

continente              Asia
km2                9600000.0
poblacion_miles      1409517
Name: China, dtype: object

In [13]:
paises.iloc[0]

continente              Asia
km2                9600000.0
poblacion_miles      1409517
Name: China, dtype: object

In [16]:
paises["continente"].value_counts()

continente
Africa     56
America    52
Asia       50
Europa     48
Oceania    23
Name: count, dtype: int64

#### Seleccionando países de extensión pequeña

Filtro para países con extensión menor a 50 km^2

In [20]:
filtro = paises["km2"] < 50
filtro

pais
China                          False
India                          False
United States of America       False
Indonesia                      False
Brazil                         False
                               ...  
Saint Helena                   False
Falkland Islands (Malvinas)    False
Niue                           False
Holy See                        True
Tokelau                         True
Name: km2, Length: 229, dtype: bool

In [22]:
paises[filtro]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"China, Macao SAR",Asia,30.0,623
Sint Maarten (Dutch part),America,34.0,40
Monaco,Europa,2.0,39
Gibraltar,Europa,6.0,35
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Holy See,Europa,1.0,1
Tokelau,Oceania,12.0,1


In [21]:
paises[filtro].count()

continente         9
km2                9
poblacion_miles    9
dtype: int64

#### Seleccionando países de extensión pequeña pero altamente poblados

Filtro para países con extensión menor a 50 km^2 y población mayor a 500

In [25]:
paises[ (paises["km2"] < 50) & (paises["poblacion_miles"] > 500) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"China, Macao SAR",Asia,30.0,623


#### Seleccionando países de extensión pequeña o poco poblados

Filtro para países con extensión menor a 5 km^2 y población menor a 5

In [26]:
paises[ (paises["km2"] < 5) | (paises["poblacion_miles"] < 5) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monaco,Europa,2.0,39
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Saint Helena,Africa,308.0,4
Falkland Islands (Malvinas),America,12173.0,3
Niue,Oceania,260.0,2
Holy See,Europa,1.0,1
Tokelau,Oceania,12.0,1


#### Seleccionando países pequeños, poco poblados y europeos

Países europeos con extensión menor a 50 km^2 y población menor a 50

In [29]:
paises[ (paises["km2"] < 50) & 
        (paises["poblacion_miles"] < 50) & 
        (paises["continente"] == "Europa") ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monaco,Europa,2.0,39
Gibraltar,Europa,6.0,35
Holy See,Europa,1.0,1


#### Seleccionando países pequeños, poco poblados y no europeos

Países europeos con extensión menor a 50 km^2 y población menor a 50

In [30]:
paises[ (paises["continente"] != "Europa") &
        (paises["km2"] < 50) & 
        (paises["poblacion_miles"] < 50) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sint Maarten (Dutch part),America,34.0,40
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Tokelau,Oceania,12.0,1


In [32]:
# Otra forma
paises[ ~(paises["continente"] == "Europa") &
        (paises["km2"] < 50) & 
        (paises["poblacion_miles"] < 50) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sint Maarten (Dutch part),America,34.0,40
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Tokelau,Oceania,12.0,1


In [34]:
# Otra forma
paises.loc[ ~(paises["continente"] == "Europa") &
        (paises["km2"] < 50) & 
        (paises["poblacion_miles"] < 50) ]

Unnamed: 0_level_0,continente,km2,poblacion_miles
pais,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sint Maarten (Dutch part),America,34.0,40
"Bonaire, Sint Eustatius and Saba",America,1.0,25
Nauru,Oceania,21.0,11
Tuvalu,Oceania,26.0,11
Tokelau,Oceania,12.0,1
