In [1]:
#Configuración del ambiente 
import pandas as pd
import numpy as np 
import requests
from io import StringIO
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
from sklearn.ensemble import RandomForestRegressor
import json

In [2]:
df_covid = pd.read_csv(StringIO(requests.get("https://covid19.who.int/WHO-COVID-19-global-data.csv").text))
df_population = pd.read_excel('https://raw.githubusercontent.com/ElProfeAlejo/Bootcamp_Databases/main/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx', sheet_name=0, skiprows=16)
df_population = df_population[df_population['Year'] == 2019]

In [3]:
df_covid

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-05,AF,Afghanistan,EMRO,,0,,0
1,2020-01-12,AF,Afghanistan,EMRO,,0,,0
2,2020-01-19,AF,Afghanistan,EMRO,,0,,0
3,2020-01-26,AF,Afghanistan,EMRO,,0,,0
4,2020-02-02,AF,Afghanistan,EMRO,,0,,0
...,...,...,...,...,...,...,...,...
50875,2023-12-24,ZW,Zimbabwe,AFRO,23.0,266048,1.0,5731
50876,2023-12-31,ZW,Zimbabwe,AFRO,23.0,266071,,5731
50877,2024-01-07,ZW,Zimbabwe,AFRO,46.0,266117,3.0,5734
50878,2024-01-14,ZW,Zimbabwe,AFRO,85.0,266202,3.0,5737


In [4]:
# Hallar nulos
df_covid.isna().sum().sort_values(ascending = False)

New_deaths           26664
New_cases            14258
WHO_region            5088
Country_code          1484
Country               1272
Date_reported            0
Cumulative_cases         0
Cumulative_deaths        0
dtype: int64

In [5]:
#Eliminar todos los registros con nulos en la columna Country_code.
df_covid = df_covid.dropna(subset=['Country_code'])
df_covid

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-05,AF,Afghanistan,EMRO,,0,,0
1,2020-01-12,AF,Afghanistan,EMRO,,0,,0
2,2020-01-19,AF,Afghanistan,EMRO,,0,,0
3,2020-01-26,AF,Afghanistan,EMRO,,0,,0
4,2020-02-02,AF,Afghanistan,EMRO,,0,,0
...,...,...,...,...,...,...,...,...
50875,2023-12-24,ZW,Zimbabwe,AFRO,23.0,266048,1.0,5731
50876,2023-12-31,ZW,Zimbabwe,AFRO,23.0,266071,,5731
50877,2024-01-07,ZW,Zimbabwe,AFRO,46.0,266117,3.0,5734
50878,2024-01-14,ZW,Zimbabwe,AFRO,85.0,266202,3.0,5737


In [6]:
#Mantener en el dataframe sólo las siguientes columnas:
df_covid = df_covid[['Date_reported','Country_code','Country','New_cases','Cumulative_cases','New_deaths','Cumulative_deaths']]
df_covid

Unnamed: 0,Date_reported,Country_code,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-05,AF,Afghanistan,,0,,0
1,2020-01-12,AF,Afghanistan,,0,,0
2,2020-01-19,AF,Afghanistan,,0,,0
3,2020-01-26,AF,Afghanistan,,0,,0
4,2020-02-02,AF,Afghanistan,,0,,0
...,...,...,...,...,...,...,...
50875,2023-12-24,ZW,Zimbabwe,23.0,266048,1.0,5731
50876,2023-12-31,ZW,Zimbabwe,23.0,266071,,5731
50877,2024-01-07,ZW,Zimbabwe,46.0,266117,3.0,5734
50878,2024-01-14,ZW,Zimbabwe,85.0,266202,3.0,5737


In [7]:
#Sustituir todos los valores nulos con cero.
df_covid.fillna(0, inplace=True)
df_covid

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_covid.fillna(0, inplace=True)


Unnamed: 0,Date_reported,Country_code,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-05,AF,Afghanistan,0.0,0,0.0,0
1,2020-01-12,AF,Afghanistan,0.0,0,0.0,0
2,2020-01-19,AF,Afghanistan,0.0,0,0.0,0
3,2020-01-26,AF,Afghanistan,0.0,0,0.0,0
4,2020-02-02,AF,Afghanistan,0.0,0,0.0,0
...,...,...,...,...,...,...,...
50875,2023-12-24,ZW,Zimbabwe,23.0,266048,1.0,5731
50876,2023-12-31,ZW,Zimbabwe,23.0,266071,0.0,5731
50877,2024-01-07,ZW,Zimbabwe,46.0,266117,3.0,5734
50878,2024-01-14,ZW,Zimbabwe,85.0,266202,3.0,5737


In [8]:
#Cambiar el formato actual de la columna Date_reported para un formato de tipo datetime64[ns].
df_covid["Date_reported"] = pd.to_datetime(df_covid["Date_reported"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_covid["Date_reported"] = pd.to_datetime(df_covid["Date_reported"])


In [9]:
#Substituir el formato actual de las columnas ['New_cases', 'New_deaths']para un formato de tipo int64.
df_covid["New_deaths"] = pd.to_numeric(df_covid["New_deaths"].astype('int64'))
df_covid["New_cases"] = pd.to_numeric(df_covid["New_cases"].astype('int64'))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_covid["New_deaths"] = pd.to_numeric(df_covid["New_deaths"].astype('int64'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_covid["New_cases"] = pd.to_numeric(df_covid["New_cases"].astype('int64'))


In [10]:
#Crear una nueva columna 'lethality_rate' con la siguiente fórmula: 'Cumulative_deaths'/'Cumulative_cases'*100
df_covid['lethality_rate'] = df_covid['Cumulative_deaths'] / df_covid['Cumulative_cases'] * 100
df_covid

Unnamed: 0,Date_reported,Country_code,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,lethality_rate
0,2020-01-05,AF,Afghanistan,0,0,0,0,
1,2020-01-12,AF,Afghanistan,0,0,0,0,
2,2020-01-19,AF,Afghanistan,0,0,0,0,
3,2020-01-26,AF,Afghanistan,0,0,0,0,
4,2020-02-02,AF,Afghanistan,0,0,0,0,
...,...,...,...,...,...,...,...,...
50875,2023-12-24,ZW,Zimbabwe,23,266048,1,5731,2.154123
50876,2023-12-31,ZW,Zimbabwe,23,266071,0,5731,2.153936
50877,2024-01-07,ZW,Zimbabwe,46,266117,3,5734,2.154691
50878,2024-01-14,ZW,Zimbabwe,85,266202,3,5737,2.155130


In [11]:
#Con el fin de eliminar outliers, nos quedaremos con los registros cuyo valor en la columna lethality_rate se encuentre entre los cuantiles 0 a 0.99.
df_covid = df_covid[(df_covid['lethality_rate'] > 0) & (df_covid['lethality_rate'] < 0.99)]
df_covid

Unnamed: 0,Date_reported,Country_code,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,lethality_rate
752,2022-03-27,AS,American Samoa,1881,3278,2,2,0.061013
753,2022-04-03,AS,American Samoa,1378,4656,5,7,0.150344
754,2022-04-10,AS,American Samoa,746,5402,4,11,0.203628
755,2022-04-17,AS,American Samoa,133,5535,7,18,0.325203
756,2022-04-24,AS,American Samoa,230,5765,5,23,0.398959
...,...,...,...,...,...,...,...,...
50476,2020-05-24,ZM,Zambia,241,920,0,9,0.978261
50477,2020-05-31,ZM,Zambia,137,1057,0,9,0.851466
50478,2020-06-07,ZM,Zambia,97,1154,0,9,0.779896
50479,2020-06-14,ZM,Zambia,203,1357,0,9,0.663228


In [32]:
#Finalmente reiniciaremos el índice del dataframe final df_covid_limpio
df_covid_limpio = df_covid.reset_index(drop=True)
df_covid_limpio

Unnamed: 0,Date_reported,Country_code,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,lethality_rate
0,2022-03-27,AS,American Samoa,1881,3278,2,2,0.061013
1,2022-04-03,AS,American Samoa,1378,4656,5,7,0.150344
2,2022-04-10,AS,American Samoa,746,5402,4,11,0.203628
3,2022-04-17,AS,American Samoa,133,5535,7,18,0.325203
4,2022-04-24,AS,American Samoa,230,5765,5,23,0.398959
...,...,...,...,...,...,...,...,...
16868,2020-05-24,ZM,Zambia,241,920,0,9,0.978261
16869,2020-05-31,ZM,Zambia,137,1057,0,9,0.851466
16870,2020-06-07,ZM,Zambia,97,1154,0,9,0.779896
16871,2020-06-14,ZM,Zambia,203,1357,0,9,0.663228


In [33]:
#Corroboramos cambios y que la limpieza ha sido efectuada
df_covid_limpio.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16873 entries, 0 to 16872
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Date_reported      16873 non-null  datetime64[ns]
 1   Country_code       16873 non-null  object        
 2   Country            16873 non-null  object        
 3   New_cases          16873 non-null  int64         
 4   Cumulative_cases   16873 non-null  int64         
 5   New_deaths         16873 non-null  int64         
 6   Cumulative_deaths  16873 non-null  int64         
 7   lethality_rate     16873 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(4), object(2)
memory usage: 1.0+ MB


In [34]:
#Almacenar el dataFrame en un csv para trabajar las etapas por separado.
df_covid_limpio.to_csv('df_covid_limpio.csv', sep=',', index=False)

In [15]:
#Observamos el contenido del dataset
df_population

Unnamed: 0,Index,Variant,"Region, subregion, country or area *",Notes,Location code,ISO3 Alpha-code,ISO2 Alpha-code,SDMX code**,Type,Parent code,...,"Male Mortality before Age 60 (deaths under age 60 per 1,000 male live births)","Female Mortality before Age 60 (deaths under age 60 per 1,000 female live births)","Mortality between Age 15 and 50, both sexes (deaths under age 50 per 1,000 alive at age 15)","Male Mortality between Age 15 and 50 (deaths under age 50 per 1,000 males alive at age 15)","Female Mortality between Age 15 and 50 (deaths under age 50 per 1,000 females alive at age 15)","Mortality between Age 15 and 60, both sexes (deaths under age 60 per 1,000 alive at age 15)","Male Mortality between Age 15 and 60 (deaths under age 60 per 1,000 males alive at age 15)","Female Mortality between Age 15 and 60 (deaths under age 60 per 1,000 females alive at age 15)",Net Number of Migrants (thousands),"Net Migration Rate (per 1,000 population)"
69,70,Estimates,WORLD,,900,,,1.0,World,0,...,209.694,146.21,71.916,87.268,55.799,138.207,168.929,106.291,0,0
142,143,Estimates,Sub-Saharan Africa,b,1834,,,202.0,SDG region,1828,...,388.622,324.278,173.83,192.276,155.259,284.876,316.977,253.162,-484.702,-0.448
214,215,Estimates,Northern Africa and Western Asia,,1833,,,747.0,SDG region,1828,...,174.739,113.261,53.27,65.448,39.486,118.468,146.064,87.691,-124.728,-0.235
286,287,Estimates,Central and Southern Asia,,1831,,,62.0,SDG region,1828,...,227.047,168.558,75.217,90.827,58.668,161.773,190.486,131.231,-2473.484,-1.223
358,359,Estimates,Eastern and South-Eastern Asia,,1832,,,753.0,SDG region,1828,...,145.643,86.283,47.004,58.936,34.318,100.287,128.133,71.208,-99.778,-0.043
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20305,20306,Estimates,Samoa,,882,WSM,WS,882.0,Country/Area,957,...,199.759,133.625,68.304,81.648,53.949,148.134,178.682,114.303,-2.469,-11.584
20377,20378,Estimates,Tokelau,35,772,TKL,TK,772.0,Country/Area,957,...,141.052,142.411,63.07,58.218,68.146,133.511,132.832,134.951,0.113,66.637
20449,20450,Estimates,Tonga,,776,TON,TO,776.0,Country/Area,957,...,245.689,169.545,97.746,114.797,81.702,194.945,232.709,156.813,-1.898,-17.923
20521,20522,Estimates,Tuvalu,,798,TUV,TV,798.0,Country/Area,957,...,404.852,239.933,174.791,218.086,122.296,308.753,384.691,217.301,-0.057,-5.188


In [16]:
#Mantener en el dataframe sólo las siguientes columnas: ['ISO2 Alpha-code','Total Population, as of 1 July (thousands)','Male Population, as of 1 July (thousands)','Female Population, as of 1 July (thousands)','Population Density, as of 1 July (persons per square km)','Life Expectancy at Birth, both sexes (years)'].
df_population = df_population[['ISO2 Alpha-code','Total Population, as of 1 July (thousands)','Male Population, as of 1 July (thousands)','Female Population, as of 1 July (thousands)','Population Density, as of 1 July (persons per square km)','Life Expectancy at Birth, both sexes (years)']]
df_population

Unnamed: 0,ISO2 Alpha-code,"Total Population, as of 1 July (thousands)","Male Population, as of 1 July (thousands)","Female Population, as of 1 July (thousands)","Population Density, as of 1 July (persons per square km)","Life Expectancy at Birth, both sexes (years)"
69,,7764951.032,3906407.855,3858543.178,59.546,72.79
142,,1080677.346,538158.386,542518.959,49.425,60.725
214,,530149.56,273104.233,257045.327,42.485,73.392
286,,2021559.651,1033583.865,987975.786,195.683,70.615
358,,2324282.002,1176368.32,1147913.681,143.117,77.013
...,...,...,...,...,...,...
20305,WS,211.905,108.147,103.759,74.878,72.157
20377,TK,1.752,0.879,0.874,146,75.076
20449,TO,104.951,51.994,52.956,161.462,70.871
20521,TV,10.956,5.654,5.301,365.2,64.278


In [17]:
#Renombrar las columnas para nombre más sencillos:
df_population = df_population.rename(columns={
    'ISO2 Alpha-code': 'Country_code',
    'Total Population, as of 1 July (thousands)': 'Total_Population',
    'Male Population, as of 1 July (thousands)': 'Male_Population',
    'Female Population, as of 1 July (thousands)': 'Female_Population',
    'Population Density, as of 1 July (persons per square km)': 'Population_Density',
    'Life Expectancy at Birth, both sexes (years)': 'Life_Expectancy'
})
df_population

Unnamed: 0,Country_code,Total_Population,Male_Population,Female_Population,Population_Density,Life_Expectancy
69,,7764951.032,3906407.855,3858543.178,59.546,72.79
142,,1080677.346,538158.386,542518.959,49.425,60.725
214,,530149.56,273104.233,257045.327,42.485,73.392
286,,2021559.651,1033583.865,987975.786,195.683,70.615
358,,2324282.002,1176368.32,1147913.681,143.117,77.013
...,...,...,...,...,...,...
20305,WS,211.905,108.147,103.759,74.878,72.157
20377,TK,1.752,0.879,0.874,146,75.076
20449,TO,104.951,51.994,52.956,161.462,70.871
20521,TV,10.956,5.654,5.301,365.2,64.278


In [18]:
#Hallar todos los registros nulos.
df_population.isna().sum().sort_values(ascending = False)

Country_code          50
Total_Population       0
Male_Population        0
Female_Population      0
Population_Density     0
Life_Expectancy        0
dtype: int64

In [19]:
# Borrar registros nulos
df_population = df_population.dropna()
df_population

Unnamed: 0,Country_code,Total_Population,Male_Population,Female_Population,Population_Density,Life_Expectancy
1801,BI,11874.838,5894.278,5980.56,457.605,62.351
1873,KM,790.986,397.126,393.86,425.033,64.068
1945,DJ,1073.994,533.626,540.367,46.333,63.085
2017,ER,3498.818,1724.86,1773.958,28.881,67.3
2089,ET,114120.594,57375.433,56745.161,114.121,65.838
...,...,...,...,...,...,...
20305,WS,211.905,108.147,103.759,74.878,72.157
20377,TK,1.752,0.879,0.874,146,75.076
20449,TO,104.951,51.994,52.956,161.462,70.871
20521,TV,10.956,5.654,5.301,365.2,64.278


In [20]:
#Verificar el tipo de datos de las columnas
df_population.info()

<class 'pandas.core.frame.DataFrame'>
Index: 236 entries, 1801 to 20593
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Country_code        236 non-null    object
 1   Total_Population    236 non-null    object
 2   Male_Population     236 non-null    object
 3   Female_Population   236 non-null    object
 4   Population_Density  236 non-null    object
 5   Life_Expectancy     236 non-null    object
dtypes: object(6)
memory usage: 12.9+ KB


In [21]:
#Identificar valores único para encontrar el valor no numérico
print(df_population["Male_Population"].unique())


[5894.278 397.126 533.626 1724.86 57375.433 25272.193 13804.538 9168.342
 640.524 139.735 14851.787 458.138 6266.092 54.919 8011.375 5167.14
 21241.606 29563.439 9066.397 7231.989 15983.035 12855.489 2605.852
 8094.822 2781.753 44600.412 821.881 1144.323 107.123 21756.903 53429.404
 3329.012 18281.786 21607.642 5962.724 301.06 1233.202 579.801 1097.956
 28207.948 6152.125 10431.262 286.974 13218.128 1247.342 15723.678
 6349.991 971.745 2479.442 10387.581 2145.13 11889.761 102680.839 2.792
 7860.618 4030.454 4142.015 9005.743 3105.037 4696.898 3050.231 16495.318
 726781.914 3461.876 312.356 11802.406 12707.874 61158.671 1606.094
 25894.464 19090.409 82164.179 407.103 714325.057 43792.157 291.624
 13704.208 113015.042 10440.86 226.876 8021.264 135798.442 3636.206
 16794.024 26423.311 56011.134 3066.573 34723.388 653.013 47288.433
 1274.767 5051.096 933.409 615.637 1774.749 20788.703 4287.58 5555.605
 2738.222 2814.124 2924.701 2055.936 20836.089 2446.813 10073.931 41866.95
 6443.813 1595

In [22]:
# Encontrar una fila con una condicion
n = df_population[df_population['Male_Population'] == '...']
n

Unnamed: 0,Country_code,Total_Population,Male_Population,Female_Population,Population_Density,Life_Expectancy
12817,VA,0.528,...,...,1200,...


In [23]:
# Borrar teniendo el indice
df_population.drop(df_population[df_population['Male_Population'] == '...'].index, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_population.drop(df_population[df_population['Male_Population'] == '...'].index, inplace=True)


In [24]:
#Substituir el formato actual de las columnas ['Total_Population','Male_Population','Female_Population','Population_Density','Life_Expectancy']para un formato de tipo int64.
df_population["Total_Population"] = pd.to_numeric(df_population["Total_Population"].astype('int64'))
df_population["Male_Population"] = pd.to_numeric(df_population["Male_Population"].astype('int64'))
df_population["Female_Population"] = pd.to_numeric(df_population["Female_Population"].astype('int64'))
df_population["Population_Density"] = pd.to_numeric(df_population["Population_Density"].astype('int64'))
df_population["Life_Expectancy"] = pd.to_numeric(df_population["Life_Expectancy"].astype('int64'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_population["Total_Population"] = pd.to_numeric(df_population["Total_Population"].astype('int64'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_population["Male_Population"] = pd.to_numeric(df_population["Male_Population"].astype('int64'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_po

In [25]:
#Multiplicar por 1000 el valor actual de las siguientes columnas 'Total_Population','Male_Population','Female_Population' y sobreescribirlas.
df_population[['Total_Population','Male_Population','Female_Population']] = df_population[['Total_Population','Male_Population','Female_Population']] * 1000
df_population

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_population[['Total_Population','Male_Population','Female_Population']] = df_population[['Total_Population','Male_Population','Female_Population']] * 1000


Unnamed: 0,Country_code,Total_Population,Male_Population,Female_Population,Population_Density,Life_Expectancy
1801,BI,11874000,5894000,5980000,457,62
1873,KM,790000,397000,393000,425,64
1945,DJ,1073000,533000,540000,46,63
2017,ER,3498000,1724000,1773000,28,67
2089,ET,114120000,57375000,56745000,114,65
...,...,...,...,...,...,...
20305,WS,211000,108000,103000,74,72
20377,TK,1000,0,0,146,75
20449,TO,104000,51000,52000,161,70
20521,TV,10000,5000,5000,365,64


In [26]:
# Buscar nulos
df_population.isna().sum().sort_values(ascending = False)

Country_code          0
Total_Population      0
Male_Population       0
Female_Population     0
Population_Density    0
Life_Expectancy       0
dtype: int64

In [27]:
#Finalmente reiniciaremos el índice del dataframe final df_population_limpio
# Reiniciar índice
df_population_limpio = df_population.reset_index(drop=True)
df_population_limpio

Unnamed: 0,Country_code,Total_Population,Male_Population,Female_Population,Population_Density,Life_Expectancy
0,BI,11874000,5894000,5980000,457,62
1,KM,790000,397000,393000,425,64
2,DJ,1073000,533000,540000,46,63
3,ER,3498000,1724000,1773000,28,67
4,ET,114120000,57375000,56745000,114,65
...,...,...,...,...,...,...
230,WS,211000,108000,103000,74,72
231,TK,1000,0,0,146,75
232,TO,104000,51000,52000,161,70
233,TV,10000,5000,5000,365,64


In [28]:
df_population_limpio.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 235 entries, 0 to 234
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Country_code        235 non-null    object
 1   Total_Population    235 non-null    int64 
 2   Male_Population     235 non-null    int64 
 3   Female_Population   235 non-null    int64 
 4   Population_Density  235 non-null    int64 
 5   Life_Expectancy     235 non-null    int64 
dtypes: int64(5), object(1)
memory usage: 11.1+ KB


In [29]:
#Almacenar el dataFrame en un csv para trabajar las etapas por separado.
df_population_limpio.to_csv('df_population_limpio.csv', sep=',', index=False)