# ¿El planeta se calienta?  80 años de cambio en la temperatura global


### Análisis exploratorio y tratamiento de datos

Importación de librerías necesarias

In [1]:
import pandas as pd

Lectura de los datasets

In [2]:
df_temperature = pd.read_csv(r'average-monthly-surface-temperature.csv')
df_continent = pd.read_csv(r'continents2.csv')

In [3]:
df_temperature.head()

Unnamed: 0,Entity,Code,year,Day,Average surface temperature,Average surface temperature.1
0,Afghanistan,AFG,1940,1940-01-15,-2.032494,11.327695
1,Afghanistan,AFG,1940,1940-02-15,-0.733503,11.327695
2,Afghanistan,AFG,1940,1940-03-15,1.999134,11.327695
3,Afghanistan,AFG,1940,1940-04-15,10.199754,11.327695
4,Afghanistan,AFG,1940,1940-05-15,17.942135,11.327695


In [4]:
df_continent.head()

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,,2.0,15.0,
4,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,,9.0,61.0,


Eliminamos columnas que no utilizaremos: name, alpha-2, iso_3166-2, region-code, sub-region-code, intermediate-region-code

In [5]:
df_continent.drop(['name','alpha-2','iso_3166-2','iso_3166-2', 'sub-region-code', 'intermediate-region-code', 'intermediate-region', 'country-code', 'region-code'],
  axis='columns', inplace=True)
df_continent.head()

Unnamed: 0,alpha-3,region,sub-region
0,AFG,Asia,Southern Asia
1,ALA,Europe,Northern Europe
2,ALB,Europe,Southern Europe
3,DZA,Africa,Northern Africa
4,ASM,Oceania,Polynesia


In [6]:
df_merge = df_temperature.merge(df_continent, 
                      how='left', 
                      left_on='Code', 
                      right_on='alpha-3', 
                      suffixes=('_temp', '_cont'))
df_merge

Unnamed: 0,Entity,Code,year,Day,Average surface temperature,Average surface temperature.1,alpha-3,region,sub-region
0,Afghanistan,AFG,1940,1940-01-15,-2.032494,11.327695,AFG,Asia,Southern Asia
1,Afghanistan,AFG,1940,1940-02-15,-0.733503,11.327695,AFG,Asia,Southern Asia
2,Afghanistan,AFG,1940,1940-03-15,1.999134,11.327695,AFG,Asia,Southern Asia
3,Afghanistan,AFG,1940,1940-04-15,10.199754,11.327695,AFG,Asia,Southern Asia
4,Afghanistan,AFG,1940,1940-05-15,17.942135,11.327695,AFG,Asia,Southern Asia
...,...,...,...,...,...,...,...,...,...
198895,Zimbabwe,ZWE,2024,2024-08-15,20.559408,22.921250,ZWE,Africa,Sub-Saharan Africa
198896,Zimbabwe,ZWE,2024,2024-09-15,23.642931,22.921250,ZWE,Africa,Sub-Saharan Africa
198897,Zimbabwe,ZWE,2024,2024-10-15,24.407030,22.921250,ZWE,Africa,Sub-Saharan Africa
198898,Zimbabwe,ZWE,2024,2024-11-15,25.672321,22.921250,ZWE,Africa,Sub-Saharan Africa


Comprobar la existencia de valores nulos y tratarlos

In [7]:
print(df_merge.isna().sum())
df_merge_nulls = df_merge[df_merge.isna().any(axis=1)]
paises_con_nulos = df_merge_nulls['Entity'].dropna().unique()
print("Los países con datos nulos son:", ", ".join(paises_con_nulos))
df_merge_nulls

Entity                              0
Code                                0
year                                0
Day                                 0
Average surface temperature         0
Average surface temperature.1       0
alpha-3                          2040
region                           2040
sub-region                       2040
dtype: int64
Los países con datos nulos son: Kosovo, World


Unnamed: 0,Entity,Code,year,Day,Average surface temperature,Average surface temperature.1,alpha-3,region,sub-region
99960,Kosovo,OWID_KOS,1940,1940-01-15,-7.012903,7.452425,,,
99961,Kosovo,OWID_KOS,1940,1940-02-15,-1.466986,7.452425,,,
99962,Kosovo,OWID_KOS,1940,1940-03-15,0.732306,7.452425,,,
99963,Kosovo,OWID_KOS,1940,1940-04-15,8.351257,7.452425,,,
99964,Kosovo,OWID_KOS,1940,1940-05-15,12.231471,7.452425,,,
...,...,...,...,...,...,...,...,...,...
195835,World,OWID_WRL,2024,2024-08-15,16.822422,15.090968,,,
195836,World,OWID_WRL,2024,2024-09-15,16.174273,15.090968,,,
195837,World,OWID_WRL,2024,2024-10-15,15.245267,15.090968,,,
195838,World,OWID_WRL,2024,2024-11-15,14.098477,15.090968,,,


Los datos nulos corresponden a Kosovo y a World, en el caso de World se eliminará del estudio y se analizarán los datos a nivel mundial posteriormente.
En el caso de Kosovo se modificaran los valores nulos segun información real del país y se actualizará el código para que se ajuste según el estándar ISO 3166-1 alpha-3 (código de tres letras) es "XKX".

In [8]:
row_world = df_merge.loc[df_merge['Entity'] == 'World'].index
df_merge.drop(row_world, axis=0, inplace=True)
row_kosovo = df_merge.loc[df_merge['Entity'] == 'Kosovo'].index
df_merge.loc[row_kosovo, 'Code'] = 'XKX'
df_merge.loc[row_kosovo, 'region'] = 'Europe'
df_merge.loc[row_kosovo, 'sub-region'] = 'Southern Europe'
df_merge[df_merge['Entity'] == 'Kosovo']

Unnamed: 0,Entity,Code,year,Day,Average surface temperature,Average surface temperature.1,alpha-3,region,sub-region
99960,Kosovo,XKX,1940,1940-01-15,-7.012903,7.452425,,Europe,Southern Europe
99961,Kosovo,XKX,1940,1940-02-15,-1.466986,7.452425,,Europe,Southern Europe
99962,Kosovo,XKX,1940,1940-03-15,0.732306,7.452425,,Europe,Southern Europe
99963,Kosovo,XKX,1940,1940-04-15,8.351257,7.452425,,Europe,Southern Europe
99964,Kosovo,XKX,1940,1940-05-15,12.231471,7.452425,,Europe,Southern Europe
...,...,...,...,...,...,...,...,...,...
100975,Kosovo,XKX,2024,2024-08-15,23.763653,12.345814,,Europe,Southern Europe
100976,Kosovo,XKX,2024,2024-09-15,17.061443,12.345814,,Europe,Southern Europe
100977,Kosovo,XKX,2024,2024-10-15,12.335925,12.345814,,Europe,Southern Europe
100978,Kosovo,XKX,2024,2024-11-15,3.705254,12.345814,,Europe,Southern Europe


Se actualizan los nombres de las series a fin de que sean mas entendibles, se eliminará alpha-3 y Day, ya que todos los datos fueron tomados el día 15 y se agregará una nueva fila: mes. De esta forma tendremos datos mas organizados para analisis futuros.

In [9]:
df_merge['Day'] = pd.to_datetime(df_merge['Day'])
df_merge.insert(3, 'Month', df_merge['Day'].dt.month_name())

df_merge.drop(['alpha-3', 'Day'], axis='columns', inplace=True)
df_temp_country = df_merge.rename(columns={
    'Average surface temperature': 'AVG_daily_surface_temperature',
    'Average surface temperature.1': 'AVG_monthly_surface_temperature',
    'sub-region': 'sub_region',
    'year': 'Year',
    'region': 'Region'
})
df_temp_country

Unnamed: 0,Entity,Code,Year,Month,AVG_daily_surface_temperature,AVG_monthly_surface_temperature,Region,sub_region
0,Afghanistan,AFG,1940,January,-2.032494,11.327695,Asia,Southern Asia
1,Afghanistan,AFG,1940,February,-0.733503,11.327695,Asia,Southern Asia
2,Afghanistan,AFG,1940,March,1.999134,11.327695,Asia,Southern Asia
3,Afghanistan,AFG,1940,April,10.199754,11.327695,Asia,Southern Asia
4,Afghanistan,AFG,1940,May,17.942135,11.327695,Asia,Southern Asia
...,...,...,...,...,...,...,...,...
198895,Zimbabwe,ZWE,2024,August,20.559408,22.921250,Africa,Sub-Saharan Africa
198896,Zimbabwe,ZWE,2024,September,23.642931,22.921250,Africa,Sub-Saharan Africa
198897,Zimbabwe,ZWE,2024,October,24.407030,22.921250,Africa,Sub-Saharan Africa
198898,Zimbabwe,ZWE,2024,November,25.672321,22.921250,Africa,Sub-Saharan Africa


Finalmente realizaremos una clasificación por décadas, para hacer análisis globales futuros

### Desarrollo de Infografías y Gráfico Interactivo