In [10]:
import pandas as pd
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
import numpy as np
import plotly.offline as py


In [11]:
df = pd.read_csv("GlobalLandTemperaturesByCountry.csv")
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [12]:
df["dt"] = pd.to_datetime(df['dt'])
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [13]:
df["year"] = pd.DatetimeIndex(df['dt']).year
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,year
0,1743-11-01,4.384,2.294,Åland,1743
1,1743-12-01,,,Åland,1743
2,1744-01-01,,,Åland,1744
3,1744-02-01,,,Åland,1744
4,1744-03-01,,,Åland,1744


In [14]:
df["Country"] = df["Country"].str.upper()

In [15]:
df_clear = df[~df['Country'].isin(
    ['DENMARK', 'ANTARTICA', 'ANTARCTICA', 'FRANCE', 'AFRICA', 'ASIA', 'EUROPE', 'ÅLAND', 'NETHERLANDS', 'BAKER ISLAND',
     'BONAIRE, SAINT EUSTATIUS AND SABA', 'BURMA', 'UNITED KINGDOM', 'AFRICA', 'SOUTH AMERICA',
    'GAZA STRIP','KINGMAN REEF','NORTH AMERICA','OCEANIA','PALMYRA ATOLL','REUNION','SINT MAARTEN',
     'FRENCH SOUTHERN AND ANTARCTIC LANDS','TIMOR LESTE','WESTERN SAHARA'])]

df_clear = df_clear.replace(
   ['DENMARK (EUROPE)', 'FRANCE (EUROPE)', 'NETHERLANDS (EUROPE)',
    'UNITED KINGDOM (EUROPE)', 'CONGO (DEMOCRATIC REPUBLIC OF THE)',
   'FALKLAND ISLANDS (ISLAS MALVINAS)','GUINEA BISSAU','PALESTINA',
   'SOUTH GEORGIA AND THE SOUTH SANDWICH ISLA','TURKS AND CAICAS ISLANDS','VIRGIN ISLANDS'],
   ['DENMARK', 'FRANCE', 'NETHERLANDS', 'UNITED KINGDOM','CONGO, THE DEMOCRATIC REPUBLIC OF THE',
   'FALKLAND ISLANDS (MALVINAS)','GUINEA-BISSAU','PALESTINE',
   'SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS','TURKS AND CAICOS ISLANDS','VIRGIN ISLANDS, BRITISH'])

df_clear.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,year
3239,1838-04-01,13.008,2.586,AFGHANISTAN,1838
3240,1838-05-01,,,AFGHANISTAN,1838
3241,1838-06-01,23.95,2.51,AFGHANISTAN,1838
3242,1838-07-01,26.877,2.883,AFGHANISTAN,1838
3243,1838-08-01,24.938,2.992,AFGHANISTAN,1838


In [16]:
df_clear["country_code"] = df_clear["Country"].apply(lambda x: country_name_to_country_alpha2(x,cn_name_format="upper"))
df_clear["continent"] = df_clear["country_code"].apply(lambda x: country_alpha2_to_continent_code(x))

In [23]:
locations = pd.read_csv("https://raw.githubusercontent.com/albertyw/avenews/master/old/data/average-latitude-longitude-countries.csv")
locations = locations.rename(columns={"ISO 3166 Country Code":"country_code"})
locations = locations.drop(columns=["Country"])
locations.head()

Unnamed: 0,country_code,Latitude,Longitude
0,AD,42.5,1.5
1,AE,24.0,54.0
2,AF,33.0,65.0
3,AG,17.05,-61.8
4,AI,18.25,-63.17


In [24]:
final_df = pd.merge(df_clear,locations,on="country_code")
final_df

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,year,country_code,continent,Latitude,Longitude
0,1838-04-01,13.008,2.586,AFGHANISTAN,1838,AF,AS,33.0,65.0
1,1838-05-01,,,AFGHANISTAN,1838,AF,AS,33.0,65.0
2,1838-06-01,23.950,2.510,AFGHANISTAN,1838,AF,AS,33.0,65.0
3,1838-07-01,26.877,2.883,AFGHANISTAN,1838,AF,AS,33.0,65.0
4,1838-08-01,24.938,2.992,AFGHANISTAN,1838,AF,AS,33.0,65.0
...,...,...,...,...,...,...,...,...,...
505971,2013-05-01,19.059,1.022,ZIMBABWE,2013,ZW,AF,-20.0,30.0
505972,2013-06-01,17.613,0.473,ZIMBABWE,2013,ZW,AF,-20.0,30.0
505973,2013-07-01,17.000,0.453,ZIMBABWE,2013,ZW,AF,-20.0,30.0
505974,2013-08-01,19.759,0.717,ZIMBABWE,2013,ZW,AF,-20.0,30.0


### Export to csv

In [26]:
final_df.to_csv("GlobalLandTemperaturesByCountry_clean.csv")