## **pandas_datareader**

Esta biblioteca permirte acceder a distintos Set de Datos de forma remota.


A continuación revisaremos algunas conexiones que ofrece esta biblioteca.

In [1]:
%pip install pandas_datareader



In [2]:
from IPython.display import IFrame

# Mostrar la documentación de pandas-datareader en un iframe
IFrame('https://pandas-datareader.readthedocs.io/en/latest/py-modindex.html', width=1000, height=400)


## Información obtenida desde el Banco Mundial

In [3]:
from pandas_datareader import wb
from datetime import datetime

### Seleccionar Paises

`pandas_datareader.wb.get_countries(**kwargs)`

Información administrativa de los paises registrados en la base de datos del Banco Mundial.

In [4]:
wb.get_countries()

Unnamed: 0,iso3c,iso2c,name,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
0,ABW,AW,Aruba,Latin America & Caribbean,,High income,Not classified,Oranjestad,-70.0167,12.5167
1,AFE,ZH,Africa Eastern and Southern,Aggregates,,Aggregates,Aggregates,,,
2,AFG,AF,Afghanistan,"Middle East, North Africa, Afghanistan & Pakistan","Middle East, North Africa, Afghanistan & Pakis...",Low income,IDA,Kabul,69.1761,34.5228
3,AFR,A9,Africa,Aggregates,,Aggregates,Aggregates,,,
4,AFW,ZI,Africa Western and Central,Aggregates,,Aggregates,Aggregates,,,
...,...,...,...,...,...,...,...,...,...,...
291,XZN,A5,Sub-Saharan Africa excluding South Africa and ...,Aggregates,,Aggregates,Aggregates,,,
292,YEM,YE,"Yemen, Rep.","Middle East, North Africa, Afghanistan & Pakistan","Middle East, North Africa, Afghanistan & Pakis...",Low income,IDA,Sana'a,44.2075,15.3520
293,ZAF,ZA,South Africa,Sub-Saharan Africa,Sub-Saharan Africa (excluding high income),Upper middle income,IBRD,Pretoria,28.1871,-25.7460
294,ZMB,ZM,Zambia,Sub-Saharan Africa,Sub-Saharan Africa (excluding high income),Lower middle income,IDA,Lusaka,28.2937,-15.3982


In [5]:
df_info_paises = wb.get_countries()
df_info_paises.head()

Unnamed: 0,iso3c,iso2c,name,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
0,ABW,AW,Aruba,Latin America & Caribbean,,High income,Not classified,Oranjestad,-70.0167,12.5167
1,AFE,ZH,Africa Eastern and Southern,Aggregates,,Aggregates,Aggregates,,,
2,AFG,AF,Afghanistan,"Middle East, North Africa, Afghanistan & Pakistan","Middle East, North Africa, Afghanistan & Pakis...",Low income,IDA,Kabul,69.1761,34.5228
3,AFR,A9,Africa,Aggregates,,Aggregates,Aggregates,,,
4,AFW,ZI,Africa Western and Central,Aggregates,,Aggregates,Aggregates,,,


In [6]:
# Seleccionar pais Chile
df_info_paises.query("name=='Chile'")

Unnamed: 0,iso3c,iso2c,name,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
49,CHL,CL,Chile,Latin America & Caribbean,,High income,IBRD,Santiago,-70.6475,-33.475


In [7]:
df_info_paises[df_info_paises['name'].str.contains('hile')]

Unnamed: 0,iso3c,iso2c,name,region,adminregion,incomeLevel,lendingType,capitalCity,longitude,latitude
49,CHL,CL,Chile,Latin America & Caribbean,,High income,IBRD,Santiago,-70.6475,-33.475


In [8]:
df_info_paises.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 296 entries, 0 to 295
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   iso3c        296 non-null    object 
 1   iso2c        296 non-null    object 
 2   name         296 non-null    object 
 3   region       296 non-null    object 
 4   adminregion  296 non-null    object 
 5   incomeLevel  296 non-null    object 
 6   lendingType  296 non-null    object 
 7   capitalCity  296 non-null    object 
 8   longitude    211 non-null    float64
 9   latitude     211 non-null    float64
dtypes: float64(2), object(8)
memory usage: 23.3+ KB


In [9]:
df_info_paises['region'].unique()

array(['Latin America & Caribbean ', 'Aggregates',
       'Middle East, North Africa, Afghanistan & Pakistan',
       'Sub-Saharan Africa ', 'Europe & Central Asia',
       'East Asia & Pacific', 'South Asia', 'North America'], dtype=object)

In [10]:
df_paises = df_info_paises[df_info_paises['region']!= 'Aggregates']

In [None]:
# Conocer las regiones asociadas a los paises
df_paises['region'].value_counts().reset_index()

In [None]:
#Quitar espacios en blanco al inicio y al final
df_paises['region'] = df_paises['region'].str.strip()

In [None]:
# Conocer las regiones asociadas a los paises
df_paises['region'].unique()

In [None]:
df_paises.head(1)

In [None]:
# Tabla pivot por región y nivel de ingreso
import pandas as pd
pd.pivot_table(data = df_paises, index = 'region',columns = 'incomeLevel',aggfunc='size', fill_value=0)

In [None]:
df_paises[(df_paises['region'] == 'East Asia & Pacific') & (df_paises['incomeLevel'] == 'Low income')]

In [None]:
df_paises[(df_paises['region'] == 'Latin America & Caribbean') & (df_paises['incomeLevel'] == 'Not classified')]

In [None]:
df_paises[df_paises['name'].str.contains('Salvador')]

In [None]:
# Graficar de acuerdo al nivel de Ingreso
import plotly.express as px


# Crear el gráfico coroplético básico
fig = px.choropleth(
    df_paises,
    locations='iso3c',
    color='incomeLevel',
    hover_name='name',
    projection='natural earth',
    title='Nivel de ingresos paises',
    color_continuous_scale=px.colors.sequential.Blues_r  # Cambiar la escala de colores
)

# Ajustar el tamaño de la figura
fig.update_layout(
    width=1000,  # Ancho del gráfico
    height=600,  # Altura del gráfico
    title_font_size=24  # Tamaño de la fuente del título
)

# Mostrar el gráfico
fig.show()

### Obtener las series (bases de datos) del Banco Mundial

`pandas_datareader.wb.get_indicators(**kwargs)`

Información acerca de las series del Banco Mundial.

In [None]:
#Obtener indicadores del Banco Mundial
indicadores = wb.get_indicators()
indicadores


In [None]:
indicadores['name'].str.contains()

In [None]:
# Vusualizar
indicadores.query("name.str.contains(r'^[Aa]gricultural land', regex = True)")

### Descargar series (bases de datos) de distintos paises

```python
pandas_datareader.wb.download(country=None,
        indicator=None,
        start=2003,
        end=2005,
        freq=None,
        errors='warn',
        **kwargs)
```

Función que descarga información de indicadores de desarrollo


In [None]:
start = datetime(1960, 1, 1)
end = datetime(2022, 12, 31)
id="AG.LND.AGRI.ZS"

In [None]:
# Obtener data Tierras agrícolas (% del área de tierra)
df_ag_land = wb.download(indicator=id, start =start, end=end, country='all')
df_ag_land.head(10)

In [None]:
# Resetear Indices
df_ag_land = df_ag_land.reset_index()
df_ag_land.head(10)

In [None]:
df_ag_land['country'].unique()

In [None]:
df_ag_land = df_ag_land.sort_values(['country','year'],ascending=[True,True])

In [None]:
df_ag_land['country'].unique()

In [None]:
regiones = list(df_paises['region'].unique())
regiones

In [None]:
df_region = df_ag_land[df_ag_land['country'].isin(regiones)]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import plotly.express as px

In [None]:
px.line(data_frame=df_region,
        x='year',
        y='AG.LND.AGRI.ZS',
        color='country',
        title='Tierras agrícolas (% del área de tierra)')

In [None]:
# Seleccionar serie PIB
start = datetime(1960, 1, 1)
end = datetime(2025, 1, 1)
indicator_id = 'NY.GDP.PCAP.KD'

In [None]:
gdp_per_capita = wb.download(indicator=indicator_id, start =start, end=end, country='all')

In [None]:
gdp_per_capita = gdp_per_capita.reset_index()
gdp_per_capita.head()

In [None]:
gdp_per_capita = gdp_per_capita.sort_values(['country','year'],ascending=[True,True])

In [None]:
gdp_per_capita_region = gdp_per_capita[gdp_per_capita['country'].isin(list(df_info_paises['region'].unique()))]

In [None]:
gdp_per_capita_region.head(10)

In [None]:
df_pib_region = gdp_per_capita_region[gdp_per_capita_region['country'].isin(regiones)]

In [None]:
px.line(data_frame=df_pib_region, x='year', y='NY.GDP.PCAP.KD', color='country', title = 'PIB per-capita (US$ constante 2015)')

In [None]:
px.line(data_frame=gdp_per_capita_region, x='year', y='NY.GDP.PCAP.KD', color='country', title = 'PIB per-capita (US$ constante 2015)')

In [None]:

gdp_per_capita[gdp_per_capita['country'] == 'Chile']

In [None]:
px.line(data_frame=gdp_per_capita[gdp_per_capita['country'] == 'Chile'], x='year', y='NY.GDP.PCAP.KD', color='country', title = 'PIB per-capita Chile (US$ constante 2015)')

## yfinance

In [None]:
import pandas_datareader.data as web
import pandas as pd
import datetime as dt


In [None]:
%pip install yfinance

In [None]:
import yfinance as yf

In [None]:
dat = yf.Ticker("MSFT")
dat

In [None]:
dat.history(period='1mo')