In [109]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, ForeignKey
from pprint import pprint as pp

from config import local_mysql_password, local_mysql_user

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)

## Import and preprocess FAO crop yield data

In [240]:
fao_file_path = "./data/fao_data/"

# Load the crop yield data
file_name = 'Production_Crops_Livestock_E_All_Data_(Normalized)'
fao_crop_yield_data = pd.read_csv(f"{fao_file_path}fao_crop_data/normalized/{file_name}.csv", encoding='latin-1')
# Replace spaces column names with underscores and make lower case
fao_crop_yield_data.columns = fao_crop_yield_data.columns.str.replace(' ','_').str.lower()
# Rename confusing data aggregation flags
fao_crop_yield_data.flag.replace(np.nan, 'O', inplace=True)
fao_crop_yield_data.flag.replace('*', 'U', inplace=True)
# Remove 0 crop yield values to prevent bad training data
fao_crop_yield_data = fao_crop_yield_data[fao_crop_yield_data.value != 0.0]
# Drop the year_code becuase it is always the same as the year value
fao_crop_yield_data.drop('year_code', axis=1, inplace=True)
# Drop areas which are not countries
regions = [
    'World', 'Africa', 'Eastern Africa', 'Middle Africa', 'Northern Africa', 'Southern Africa', 'Western Africa', 'Americas',
    'Northern America', 'Central America', 'Caribbean', 'South America', 'Asia', 'Central Asia', 'Eastern Asia',
    'Southern Asia', 'South-eastern Asia', 'Western Asia', 'Europe', 'Eastern Europe', 'Northern Europe', 'Southern Europe',
    'Western Europe', 'Oceania', 'Australia and New Zealand', 'Melanesia', 'Micronesia', 'Polynesia'
    ]
special_groups = [
    'European Union (28)', 'European Union (27)', 'Least Developed Countries', 'Land Locked Developing Countries', 'Small Island Developing States',
    'Low Income Food Deficit Countries', 'Net Food Importing Developing Countries', 'Annex I countries', 'Non-Annex I countries',  'OECD'
    ]
fao_countries = fao_crop_yield_data.area.loc[~fao_crop_yield_data.area.isin(regions+special_groups)].unique().tolist()
fao_crop_yield_data = fao_crop_yield_data[fao_crop_yield_data.area.isin(fao_countries)]
# Rename columns so they aren't confused with the NOAA data
fao_crop_yield_data.rename({
    'area_code': 'fao_country_code', 
    'area': 'fao_country_name', 
    'flag': 'fao_data_quality_flag', 
    'item': 'product', 
    'item_code': 'product_id',
    'element': 'production_type',
    'element_code': 'production_type_id'
    }, axis=1, inplace=True)

fao_crop_yield_data.head()


Unnamed: 0,fao_country_code,fao_country_name,product_id,product,production_type_id,production_type,year,unit,value,fao_data_quality_flag
1,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,1976,ha,5900.0,F
2,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,1977,ha,6000.0,F
3,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,1978,ha,6000.0,F
4,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,1979,ha,6000.0,F
5,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,1980,ha,5800.0,F


In [241]:
fao_crop_yield_data.shape

(2802701, 10)

In [242]:
# Extract the country data
fao_country_data = fao_crop_yield_data[['fao_country_code', 'fao_country_name']].copy()
fao_country_data.drop_duplicates(inplace=True)
fao_country_data.shape

(211, 2)

In [243]:
# Load the noaa ghcnd countries
noaa_file_path = './data/noaa_ghcn_aws_data/'
noaa_countries_file_name = 'ghcnd-countries'
noaa_countries = pd.read_fwf(f"{noaa_file_path}{noaa_countries_file_name}.txt", header=None, names = ["FIPS_country_code", "ghcnd_country"])
noaa_countries.shape


(219, 2)

In [244]:
fao_to_noaa_country_map = {
    'Bahamas': 'Bahamas, The',
    'Bolivia (Plurinational State of)': 'Bolivia',
    'Brunei Darussalam': 'Brunei',
    'Cabo Verde': 'Cape Verde',
    'Congo': 'Congo (Brazzaville)',
    'Cook Islands': 'Cook Islands [New Zealand]',
    'Czechia': 'Czech Republic',
    "Democratic People's Republic of Korea": 'Korea, North',
    'Democratic Republic of the Congo': 'Congo (Kinshasa)',
    'French Guyana': 'French Guiana [France]',
    'Gambia': 'Gambia, The',
    'Guadeloupe': 'Guadeloupe [France]',
    'Iran (Islamic Republic of)': 'Iran',
    'Martinique': 'Martinique [France]',
    'Micronesia (Federated States of)': 'Federated States of Micronesia',
    'Myanmar': 'Burma',
    'New Caledonia': 'New Caledonia [France]',
    'Niue': 'Niue [New Zealand]',
    'Puerto Rico': 'Puerto Rico [United States]',
    'Republic of Korea': 'Korea, South',
    'Russian Federation': 'Russia',
    'Samoa': 'American Samoa [United States]',
    'Syrian Arab Republic': 'Syria',
    'Tokelau': 'Tokelau [New Zealand]',
    'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom',
    'United Republic of Tanzania': 'Tanzania',
    'United States of America': 'United States',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Viet Nam': 'Vietnam',
    'North Macedonia': 'Macedonia',
    'Réunion': 'Reunion [France]',
    "Côte d'Ivoire": "Cote D'Ivoire",
    "China, Macao SAR": "Macau S.A.R",
    "Lao People's Democratic Republic": "Laos",
    "Republic of Moldova": "Moldova",
    }

country_data = []
for fao_country_name, noaa_country_name in fao_to_noaa_country_map.items():
        country_data.append({
            'fao_country_code': fao_country_data[fao_country_data.fao_country_name==fao_country_name].fao_country_code.values[0],
            'fao_country_name': fao_country_name,
            'noaa_country_name': noaa_country_name,
            'noaa_country_code': noaa_countries[noaa_countries.ghcnd_country==noaa_country_name].FIPS_country_code.values[0],
        })

for country in fao_countries:
    if country in noaa_countries.ghcnd_country.tolist() and country not in fao_to_noaa_country_map.keys():
        country_data.append({
            'fao_country_code': fao_country_data[fao_country_data.fao_country_name==country].fao_country_code.values[0],
            'fao_country_name': country,
            'noaa_country_name': country,
            'noaa_country_code': noaa_countries[noaa_countries.ghcnd_country==country].FIPS_country_code.values[0],
        })
    elif country not in fao_to_noaa_country_map.keys():
        country_data.append({
            'fao_country_code': fao_country_data[fao_country_data.fao_country_name==country].fao_country_code.values[0],
            'fao_country_name': country,
            'noaa_country_name': np.nan,
            'noaa_country_code': np.nan,
        })
country_data = pd.DataFrame(country_data).sort_values(['noaa_country_name', 'fao_country_name'])
country_data.dropna(inplace=True)
country_data.head()

Unnamed: 0,fao_country_code,fao_country_name,noaa_country_name,noaa_country_code
35,2,Afghanistan,Afghanistan,AF
36,3,Albania,Albania,AL
37,4,Algeria,Algeria,AG
21,244,Samoa,American Samoa [United States],AQ
38,7,Angola,Angola,AO


In [248]:
# Clean up the crop yield data
# Remove rows which belong to dropped countries/regions
fao_crop_yield_data = fao_crop_yield_data[fao_crop_yield_data.fao_country_code.isin(country_data.fao_country_code)]

# Remove rows relating to unnecessary production_types.
# Yield = production/area_harvested so we can drop those two rows. The rest are for animals, which do not depend on weather.
production_types_to_drop = ['Area harvested', 'Production', 'Stocks', 'Laying', 'Producing Animals/Slaughtered', 'Yield/Carcass Weight', 'Milk Animals', 'Prod Popultn']
print(fao_crop_yield_data.shape)
fao_crop_yield_data = fao_crop_yield_data[~fao_crop_yield_data.production_type.isin(production_types_to_drop)]
print(fao_crop_yield_data.shape)

# Removing unnecessary columns
fao_crop_yield_data.drop(['fao_country_name', 'product', 'production_type_id', 'production_type'], axis=1, inplace=True, errors='ignore')

(570893, 7)
(570893, 7)


In [249]:
# Split the country_data to create two tables with primary keys
fao_country_codes = country_data[['fao_country_code', 'fao_country_name', 'noaa_country_code']]
noaa_country_codes = country_data[['noaa_country_code', 'noaa_country_name', 'fao_country_code']]
noaa_country_codes.head()

Unnamed: 0,noaa_country_code,noaa_country_name,fao_country_code
35,AF,Afghanistan,2
36,AL,Albania,3
37,AG,Algeria,4
21,AQ,American Samoa [United States],244
38,AO,Angola,7


In [250]:
# Import fao data aggregation codes
file_name = 'Production_Crops_Livestock_E_Flags'
fao_data_quality_description = pd.read_csv(f"{fao_file_path}fao_crop_data/normalized/{file_name}.csv", encoding='latin-1')
# Make column names lower case
fao_data_quality_description.columns = fao_data_quality_description.columns.str.lower()
fao_data_quality_description.rename({'flag': 'fao_data_quality_flag'}, axis=1, inplace=True)
# Rename confusing data aggregation flags
fao_data_quality_description.replace('<blank>', 'O', inplace=True)
fao_data_quality_description.replace('*', 'U', inplace=True)
fao_data_quality_description

Unnamed: 0,fao_data_quality_flag,description
0,U,Unofficial figure
1,O,Official data
2,A,Aggregate; may include official; semi-official...
3,F,FAO estimate
4,Fc,Calculated data
5,Im,FAO data based on imputation methodology
6,M,Data not available


In [253]:
# Import product item codes
file_name = 'Production_Crops_Livestock_E_ItemCodes'
fao_product_ids = pd.read_csv(f"{fao_file_path}fao_crop_data/normalized/{file_name}.csv", encoding='latin-1')
# Replace spaces column names with underscores and make lower case
fao_product_ids.columns = fao_product_ids.columns.str.replace(' ','_').str.lower()
# Rename columns so they aren't confused with the NOAA data
fao_product_ids.rename({'item': 'product', 'item_code': 'product_id'}, axis=1, inplace=True)
# Remove product_ids that don't exist in the fao_crop_yield data
fao_product_ids = fao_product_ids[fao_product_ids.product_id.isin(fao_crop_yield_data.product_id)]

fao_product_ids.head()

(306, 3)
(185, 3)


Unnamed: 0,product_id,cpc_code,product
0,101,'01195,Canary seed
5,1020,'02292,Milk; whole fresh goat
8,1025,'02954,Skins; goat; fresh
9,103,'01199.02,Grain; mixed
17,1062,'0231,Eggs; hen; in shell


## Push FAO crop yield data to database

In [234]:
class DatabaseInterface:
    def __init__(self,
                 db_name,
                 user,
                 password,
                 host='localhost',
                 port=3306,
                 driver='mysql+pymysql'):
        
        self.con = create_engine(f'{driver}://{user}:{password}@{host}')
        self.con.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}")
        self.db_engine = create_engine(f"{driver}://{user}:{password}@{host}:{port}/{db_name}", echo=True)

    def insert_data(self, df: pd.DataFrame, table_name: str, if_exists: str = 'append'):
        df.to_sql(table_name, self.db_engine, if_exists=if_exists, index=False)

    def close_connection(self):
        self.db_engine.dispose()

In [256]:
dbi = DatabaseInterface(db_name='crop_yield_prediction', 
                        user=local_mysql_user, 
                        password=local_mysql_password)
meta = MetaData()


dbi.db_engine.execute(f"DROP TABLE IF EXISTS fao_crop_yields")
dbi.db_engine.execute(f"DROP TABLE IF EXISTS fao_data_quality_description")
dbi.db_engine.execute(f"DROP TABLE IF EXISTS fao_product_ids")
dbi.db_engine.execute(f"SET FOREIGN_KEY_CHECKS=0;")
dbi.db_engine.execute(f"DROP TABLE IF EXISTS fao_country_codes")
dbi.db_engine.execute(f"DROP TABLE IF EXISTS noaa_country_codes")
dbi.db_engine.execute(f"SET FOREIGN_KEY_CHECKS=1;")

fao_data_quality_description_table = Table(
   'fao_data_quality_description', meta, 
   Column('fao_data_quality_flag', String(2), primary_key = True), 
   Column('description', String(100)),
)

fao_product_ids_table = Table(
   'fao_product_ids', meta, 
   Column('product_id', Integer, primary_key = True), 
   Column('cpc_code', String(9)),
   Column('product', String(100)),
)

fao_country_code_table = Table(
   'fao_country_codes', meta, 
   Column('fao_country_code', Integer, primary_key = True),
   Column('fao_country_name', String(100)),
   Column('noaa_country_code', String(2), ForeignKey("noaa_country_codes.noaa_country_code")), 
)

noaa_country_code_table = Table(
   'noaa_country_codes', meta, 
   Column('noaa_country_code', String(2), primary_key = True), 
   Column('noaa_country_name', String(100)),
   Column('fao_country_code', Integer, ForeignKey("fao_country_codes.fao_country_code")),
)

fao_crop_yields_table = Table(
   'fao_crop_yields', meta, 
   Column('id', Integer, primary_key = True), 
   Column('fao_country_code', Integer, ForeignKey("fao_country_codes.fao_country_code")), 
   Column('product_id', Integer, ForeignKey("fao_product_ids.product_id")),
   Column('year', Integer),
   Column('unit', String(100)),
   Column('value', Float(20)),
   Column('fao_data_quality_flag', String(2), ForeignKey("fao_data_quality_description.fao_data_quality_flag")),
)							
meta.create_all(dbi.db_engine)


2022-02-05 14:17:54,179 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2022-02-05 14:17:54,180 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-05 14:17:54,183 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2022-02-05 14:17:54,184 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-05 14:17:54,188 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2022-02-05 14:17:54,189 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-05 14:17:54,192 INFO sqlalchemy.engine.Engine DROP TABLE IF EXISTS fao_crop_yields
2022-02-05 14:17:54,193 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-05 14:17:54,320 INFO sqlalchemy.engine.Engine COMMIT
2022-02-05 14:17:54,321 INFO sqlalchemy.engine.Engine DROP TABLE IF EXISTS fao_data_quality_description
2022-02-05 14:17:54,323 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-02-05 14:17:54,352 INFO sqlalchemy.engine.Engine COMMIT
2022-02-05 14:17:54,355 INFO sqlalchemy.engine.Engine DROP TABLE IF EXISTS fao_product_ids
2022-02-05 14:17:54,356 INF

In [257]:
print("Inserting fao_data_quality_descriptions")
dbi.insert_data(fao_data_quality_description, 'fao_data_quality_description')

print("Inserting fao_product_ids")
dbi.insert_data(fao_product_ids, 'fao_product_ids')

 # The foreign keys for fao and noaa country codes have a circular dependency so we have to turn off the checks while inserting the data
dbi.db_engine.execute(f"SET FOREIGN_KEY_CHECKS=0;")
print("Inserting fao_country_codes")
dbi.insert_data(fao_country_codes, 'fao_country_codes')

print("Inserting noaa_country_codes")
dbi.insert_data(noaa_country_codes, 'noaa_country_codes')
dbi.db_engine.execute(f"SET FOREIGN_KEY_CHECKS=1;")

print("Inserting fao_crop_yields")
dbi.insert_data(fao_crop_yield_data, 'fao_crop_yields')

print("Closing connection")
dbi.close_connection()
print("Done")

Inserting fao_data_quality_descriptions
2022-02-05 14:18:03,514 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2022-02-05 14:18:03,515 INFO sqlalchemy.engine.Engine [cached since 9.057s ago] {'table_schema': 'crop_yield_prediction', 'table_name': 'fao_data_quality_description'}
2022-02-05 14:18:03,524 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-02-05 14:18:03,526 INFO sqlalchemy.engine.Engine INSERT INTO fao_data_quality_description (fao_data_quality_flag, description) VALUES (%(fao_data_quality_flag)s, %(description)s)
2022-02-05 14:18:03,527 INFO sqlalchemy.engine.Engine [generated in 0.00138s] ({'fao_data_quality_flag': 'U', 'description': 'Unofficial figure'}, {'fao_data_quality_flag': 'O', 'description': 'Official data'}, {'fao_data_quality_flag': 'A', 'description': 'Aggregate; may include official; semi-official; estimated or calculated data'}, {'fao_data_quality_flag': 'F

Inserting fao_data_quality_descriptions
2022-02-05 13:12:52,721 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2022-02-05 13:12:52,724 INFO sqlalchemy.engine.Engine [cached since 13.23s ago] {'table_schema': 'crop_yield_prediction', 'table_name': 'fao_data_quality_description'}
2022-02-05 13:12:52,729 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-02-05 13:12:52,736 INFO sqlalchemy.engine.Engine INSERT INTO fao_data_quality_description (fao_data_quality_flag, description) VALUES (%(fao_data_quality_flag)s, %(description)s)
2022-02-05 13:12:52,738 INFO sqlalchemy.engine.Engine [generated in 0.00181s] ({'fao_data_quality_flag': 'U', 'description': 'Unofficial figure'}, {'fao_data_quality_flag': 'O', 'description': 'Official data'}, {'fao_data_quality_flag': 'A', 'description': 'Aggregate; may include official; semi-official; estimated or calculated data'}, {'fao_data_quality_flag': 'F

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fe9f806b970>

array(['Area harvested', 'Yield', 'Production', 'Stocks', 'Laying',
       'Producing Animals/Slaughtered', 'Yield/Carcass Weight',
       'Milk Animals', 'Prod Popultn'], dtype=object)

1.5736292860692398

ProgrammingError: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'int(4) NOT NULL auto_increment FIRST' at line 1")
[SQL: ALTER TABLE `fao_crop_yields` ADD PRIMARY KEY (`id`) int(4) NOT NULL auto_increment FIRST;]
(Background on this error at: https://sqlalche.me/e/14/f405)

## Import and explore World Bank temperature change & precipitation data

The conclusion was reached that this data is not detailed enough to be of use

In [93]:
# Load temperature change data
file_name = 'Environment_Temperature_change_E_All_Data_(Normalized)'
fao_temp_data = pd.read_csv(f"{fao_file_path}fao_temperature_change/normalized/{file_name}.csv", encoding='latin-1')
# Replace spaces column names with underscores and make lower case
fao_temp_data.columns = fao_temp_data.columns.str.replace(' ','_').str.lower()
# Create area_type column
fao_temp_data['area_type'] = fao_temp_data['area'].map(areas_to_area_types)
# Remove unicode from months col
fao_temp_data.months = fao_temp_data.months.str.replace('\x96', '_')
fao_temp_data

Unnamed: 0,area_code,area,months_code,months,element_code,element,year_code,year,unit,value,flag,area_type
0,2,Afghanistan,7001,January,7271,Temperature change,1961,1961,°C,0.746,Fc,country
1,2,Afghanistan,7001,January,7271,Temperature change,1962,1962,°C,0.009,Fc,country
2,2,Afghanistan,7001,January,7271,Temperature change,1963,1963,°C,2.695,Fc,country
3,2,Afghanistan,7001,January,7271,Temperature change,1964,1964,°C,-5.277,Fc,country
4,2,Afghanistan,7001,January,7271,Temperature change,1965,1965,°C,1.827,Fc,country
...,...,...,...,...,...,...,...,...,...,...,...,...
537365,5873,OECD,7020,Meteorological year,6078,Standard Deviation,2016,2016,°C,0.261,Fc,special_group
537366,5873,OECD,7020,Meteorological year,6078,Standard Deviation,2017,2017,°C,0.261,Fc,special_group
537367,5873,OECD,7020,Meteorological year,6078,Standard Deviation,2018,2018,°C,0.261,Fc,special_group
537368,5873,OECD,7020,Meteorological year,6078,Standard Deviation,2019,2019,°C,0.261,Fc,special_group


In [82]:
fao_temp_data.months.unique()

array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December',
       'Dec_Jan_Feb', 'Mar_Apr_May', 'Jun_Jul_Aug', 'Sep_Oct_Nov',
       'Meteorological year'], dtype=object)

In [94]:
# Load temperature change data aggregation flags
file_name = 'Environment_Temperature_change_E_Flags'
fao_temp_flags = pd.read_csv(f"{fao_file_path}fao_temperature_change/normalized/{file_name}.csv", encoding='latin-1')
# Replace spaces column names with underscores and make lower case
fao_temp_flags.columns = fao_temp_flags.columns.str.replace(' ','_').str.lower()
fao_temp_flags

Unnamed: 0,flag,description
0,Fc,Calculated data
1,,Not applicable
2,NV,Data not available


In [97]:
world_bank_file_path = "./data/world_bank_data/"
# Load precipitation data
file_name = 'world_bank_precipitation'
precipitation = pd.read_csv(f"{world_bank_file_path}world_bank_precipitation/{file_name}.csv", encoding='latin-1')
# Replace spaces column names with underscores and make lower case
precipitation.columns = precipitation.columns.str.replace(' ','_').str.lower()
precipitation.head(5)

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Africa Eastern and Southern,AFE,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Afghanistan,AFG,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,,,327.0,,,
3,Africa Western and Central,AFW,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Angola,AGO,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,,,1010.0,,,


In [99]:
world_bank_file_path = "./data/world_bank_data/"
# Load precipitation data
file_name = 'droughts_floods_extreme_temperatures'
droughts_floods_extreme_temperatures = pd.read_csv(f"{world_bank_file_path}droughts_floods_extreme_temperatures/{file_name}.csv", encoding='latin-1')
# Replace spaces column names with underscores and make lower case
droughts_floods_extreme_temperatures.columns = droughts_floods_extreme_temperatures.columns.str.replace(' ','_').str.lower()
droughts_floods_extreme_temperatures.head(5)

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,"Droughts, floods, extreme temperatures (% of p...",EN.CLC.MDAT.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Africa Eastern and Southern,AFE,"Droughts, floods, extreme temperatures (% of p...",EN.CLC.MDAT.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Afghanistan,AFG,"Droughts, floods, extreme temperatures (% of p...",EN.CLC.MDAT.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.05929,,,,,,,,,,,
3,Africa Western and Central,AFW,"Droughts, floods, extreme temperatures (% of p...",EN.CLC.MDAT.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Angola,AGO,"Droughts, floods, extreme temperatures (% of p...",EN.CLC.MDAT.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.011765,,,,,,,,,,,


In [100]:
countries

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belgium-Luxembourg',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia (Plurinational State of)',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'China, Hong Kong SAR',
 'China, Macao SAR',
 'China, mainland',
 'China, Taiwan Province of',
 'Colombia',
 'Comoros',
 'Congo',
 'Cook Islands',
 'Costa Rica',
 "Côte d'Ivoire",
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czechia',
 'Czechoslovakia',
 "Democratic People's Republic of Korea",
 'Democratic Republic of the Congo',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia