In [2]:
# Packages
import pandas as pd
from IPython.display import IFrame
import numpy as np
import matplotlib.pyplot as plt
import csv
import seaborn as sns
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, CategoricalColorMapper, Legend, BoxSelectTool, FactorRange, HoverTool, GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.layouts import gridplot, column
from bokeh.palettes import Spectral6, Pastel1, Category20c, Inferno256, Reds256
import geopandas as gpd

### Open csv files and make first data cleaning

In [3]:
population_df = pd.read_csv('Population_E_All_Data_(Normalized).csv', encoding='latin-1')
population_df

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1950,1950,1000 persons,7480.461,X,
1,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1951,1951,1000 persons,7571.537,X,
2,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1952,1952,1000 persons,7667.533,X,
3,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1953,1953,1000 persons,7764.546,X,
4,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1954,1954,1000 persons,7864.285,X,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
169137,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2046,2046,1000 persons,1383207.147,X,
169138,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2047,2047,1000 persons,1418053.439,X,
169139,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2048,2048,1000 persons,1453280.641,X,
169140,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2049,2049,1000 persons,1488876.775,X,


In [4]:
population_df.columns

Index(['Area Code', 'Area Code (M49)', 'Area', 'Item Code', 'Item',
       'Element Code', 'Element', 'Year Code', 'Year', 'Unit', 'Value', 'Flag',
       'Note'],
      dtype='object')

In [5]:
population_df.count()

Area Code          169142
Area Code (M49)    169142
Area               169142
Item Code          169142
Item               169142
Element Code       169142
Element            169142
Year Code          169142
Year               169142
Unit               169142
Value              169142
Flag               169142
Note                    0
dtype: int64

In [6]:
population_df.isnull().sum()

Area Code               0
Area Code (M49)         0
Area                    0
Item Code               0
Item                    0
Element Code            0
Element                 0
Year Code               0
Year                    0
Unit                    0
Value                   0
Flag                    0
Note               169142
dtype: int64

In [7]:
population_df = population_df.drop(columns=['Note'])
population_df

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1950,1950,1000 persons,7480.461,X
1,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1951,1951,1000 persons,7571.537,X
2,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1952,1952,1000 persons,7667.533,X
3,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1953,1953,1000 persons,7764.546,X
4,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1954,1954,1000 persons,7864.285,X
...,...,...,...,...,...,...,...,...,...,...,...,...
169137,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2046,2046,1000 persons,1383207.147,X
169138,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2047,2047,1000 persons,1418053.439,X
169139,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2048,2048,1000 persons,1453280.641,X
169140,5817,'902,Net Food Importing Developing Countries,3010,Population - Est. & Proj.,561,Urban population,2049,2049,1000 persons,1488876.775,X


Looking at the table it's clearly visible that this dataset is about population, without sex distinction; hence, we can eliminate Item Code, Item, Element Code and Element columns. We can also eliminate Flag Description and Flag because they are not useful for our analysis.

In [8]:
population_df = population_df.drop(columns=['Item Code', 'Item', 'Element Code', 'Element', 'Flag'])
population_df

Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Unit,Value
0,2,'004,Afghanistan,1950,1950,1000 persons,7480.461
1,2,'004,Afghanistan,1951,1951,1000 persons,7571.537
2,2,'004,Afghanistan,1952,1952,1000 persons,7667.533
3,2,'004,Afghanistan,1953,1953,1000 persons,7764.546
4,2,'004,Afghanistan,1954,1954,1000 persons,7864.285
...,...,...,...,...,...,...,...
169137,5817,'902,Net Food Importing Developing Countries,2046,2046,1000 persons,1383207.147
169138,5817,'902,Net Food Importing Developing Countries,2047,2047,1000 persons,1418053.439
169139,5817,'902,Net Food Importing Developing Countries,2048,2048,1000 persons,1453280.641
169140,5817,'902,Net Food Importing Developing Countries,2049,2049,1000 persons,1488876.775


From the area codes dataset we can also see that each country has its own unique code, so it's not necessary to have both Area Code and Area Code (M49). We can eliminate Area Code (M49).

In [9]:
population_df = population_df.drop(columns=['Area Code', 'Area Code (M49)'])
population_df

Unnamed: 0,Area,Year Code,Year,Unit,Value
0,Afghanistan,1950,1950,1000 persons,7480.461
1,Afghanistan,1951,1951,1000 persons,7571.537
2,Afghanistan,1952,1952,1000 persons,7667.533
3,Afghanistan,1953,1953,1000 persons,7764.546
4,Afghanistan,1954,1954,1000 persons,7864.285
...,...,...,...,...,...
169137,Net Food Importing Developing Countries,2046,2046,1000 persons,1383207.147
169138,Net Food Importing Developing Countries,2047,2047,1000 persons,1418053.439
169139,Net Food Importing Developing Countries,2048,2048,1000 persons,1453280.641
169140,Net Food Importing Developing Countries,2049,2049,1000 persons,1488876.775


Columns Year and Year Code are exactly the same, so we can eliminate one of them.

In [10]:
population_df = population_df.drop(columns=['Year Code'])
population_df

Unnamed: 0,Area,Year,Unit,Value
0,Afghanistan,1950,1000 persons,7480.461
1,Afghanistan,1951,1000 persons,7571.537
2,Afghanistan,1952,1000 persons,7667.533
3,Afghanistan,1953,1000 persons,7764.546
4,Afghanistan,1954,1000 persons,7864.285
...,...,...,...,...
169137,Net Food Importing Developing Countries,2046,1000 persons,1383207.147
169138,Net Food Importing Developing Countries,2047,1000 persons,1418053.439
169139,Net Food Importing Developing Countries,2048,1000 persons,1453280.641
169140,Net Food Importing Developing Countries,2049,1000 persons,1488876.775


Let's use another way to have data about population

In [11]:
population_df2 = pd.read_csv("Total_population.csv")
population_df2

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,OA,Annual population,4,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1950,1950,1000 persons,7480.461,X,Figure from international organizations,
1,OA,Annual population,4,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1951,1951,1000 persons,7571.537,X,Figure from international organizations,
2,OA,Annual population,4,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1952,1952,1000 persons,7667.533,X,Figure from international organizations,
3,OA,Annual population,4,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1953,1953,1000 persons,7764.546,X,Figure from international organizations,
4,OA,Annual population,4,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1954,1954,1000 persons,7864.285,X,Figure from international organizations,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15538,OA,Annual population,716,Zimbabwe,511,Total Population - Both sexes,3010,Population - Est. & Proj.,2017,2017,1000 persons,14751.101,X,Figure from international organizations,
15539,OA,Annual population,716,Zimbabwe,511,Total Population - Both sexes,3010,Population - Est. & Proj.,2018,2018,1000 persons,15052.184,X,Figure from international organizations,
15540,OA,Annual population,716,Zimbabwe,511,Total Population - Both sexes,3010,Population - Est. & Proj.,2019,2019,1000 persons,15354.608,X,Figure from international organizations,
15541,OA,Annual population,716,Zimbabwe,511,Total Population - Both sexes,3010,Population - Est. & Proj.,2020,2020,1000 persons,15669.666,X,Figure from international organizations,


In [12]:
population_df2.columns

Index(['Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Element Code',
       'Element', 'Item Code', 'Item', 'Year Code', 'Year', 'Unit', 'Value',
       'Flag', 'Flag Description', 'Note'],
      dtype='object')

In [13]:
population_fin = population_df2.drop(columns=['Domain Code', 'Area Code (M49)', 'Element Code', 'Item Code', 'Item', 'Year Code', 'Flag', 'Flag Description', 'Note'])

In [14]:
population_fin

Unnamed: 0,Domain,Area,Element,Year,Unit,Value
0,Annual population,Afghanistan,Total Population - Both sexes,1950,1000 persons,7480.461
1,Annual population,Afghanistan,Total Population - Both sexes,1951,1000 persons,7571.537
2,Annual population,Afghanistan,Total Population - Both sexes,1952,1000 persons,7667.533
3,Annual population,Afghanistan,Total Population - Both sexes,1953,1000 persons,7764.546
4,Annual population,Afghanistan,Total Population - Both sexes,1954,1000 persons,7864.285
...,...,...,...,...,...,...
15538,Annual population,Zimbabwe,Total Population - Both sexes,2017,1000 persons,14751.101
15539,Annual population,Zimbabwe,Total Population - Both sexes,2018,1000 persons,15052.184
15540,Annual population,Zimbabwe,Total Population - Both sexes,2019,1000 persons,15354.608
15541,Annual population,Zimbabwe,Total Population - Both sexes,2020,1000 persons,15669.666


Try to see emissions by country as a share of total for every year

In [15]:
emission_share = pd.read_csv('annual-share-of-co2-emissions.csv')
emission_share

Unnamed: 0,Entity,Code,Year,Share of global annual CO₂ emissions
0,Afghanistan,AFG,1949,0.000279
1,Afghanistan,AFG,1950,0.001404
2,Afghanistan,AFG,1951,0.001436
3,Afghanistan,AFG,1952,0.001417
4,Afghanistan,AFG,1953,0.001598
...,...,...,...,...
29351,Zimbabwe,ZWE,2017,0.026584
29352,Zimbabwe,ZWE,2018,0.032030
29353,Zimbabwe,ZWE,2019,0.029973
29354,Zimbabwe,ZWE,2020,0.030081


In [16]:
food_waste_df = pd.read_csv('Environment_Food_Waste_Disposal_E_All_Data_(Normalized).csv', encoding='latin-1', low_memory=False)
food_waste_df

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,'004,Afghanistan,6988,Domestic wastewater,7225,Emissions (CH4),1990,1990,kilotonnes,15.855902,E
1,2,'004,Afghanistan,6988,Domestic wastewater,7225,Emissions (CH4),1991,1991,kilotonnes,16.988608,E
2,2,'004,Afghanistan,6988,Domestic wastewater,7225,Emissions (CH4),1992,1992,kilotonnes,18.504317,E
3,2,'004,Afghanistan,6988,Domestic wastewater,7225,Emissions (CH4),1993,1993,kilotonnes,20.204656,E
4,2,'004,Afghanistan,6988,Domestic wastewater,7225,Emissions (CH4),1994,1994,kilotonnes,21.813107,E
...,...,...,...,...,...,...,...,...,...,...,...,...
67259,5873,'198,OECD,6991,Waste - agri-food systems,7230,Emissions (N2O),2015,2015,kilotonnes,62.348709,E
67260,5873,'198,OECD,6991,Waste - agri-food systems,7230,Emissions (N2O),2016,2016,kilotonnes,63.962387,E
67261,5873,'198,OECD,6991,Waste - agri-food systems,7230,Emissions (N2O),2017,2017,kilotonnes,66.273217,E
67262,5873,'198,OECD,6991,Waste - agri-food systems,7230,Emissions (N2O),2018,2018,kilotonnes,68.439822,E


In [17]:
food_waste_df.columns

Index(['Area Code', 'Area Code (M49)', 'Area', 'Item Code', 'Item',
       'Element Code', 'Element', 'Year Code', 'Year', 'Unit', 'Value',
       'Flag'],
      dtype='object')

In [18]:
food_waste_df = food_waste_df.drop(columns=['Item Code', 'Element Code', 'Flag', 'Area Code', 'Area Code (M49)', 'Year Code'])
food_waste_df

Unnamed: 0,Area,Item,Element,Year,Unit,Value
0,Afghanistan,Domestic wastewater,Emissions (CH4),1990,kilotonnes,15.855902
1,Afghanistan,Domestic wastewater,Emissions (CH4),1991,kilotonnes,16.988608
2,Afghanistan,Domestic wastewater,Emissions (CH4),1992,kilotonnes,18.504317
3,Afghanistan,Domestic wastewater,Emissions (CH4),1993,kilotonnes,20.204656
4,Afghanistan,Domestic wastewater,Emissions (CH4),1994,kilotonnes,21.813107
...,...,...,...,...,...,...
67259,OECD,Waste - agri-food systems,Emissions (N2O),2015,kilotonnes,62.348709
67260,OECD,Waste - agri-food systems,Emissions (N2O),2016,kilotonnes,63.962387
67261,OECD,Waste - agri-food systems,Emissions (N2O),2017,kilotonnes,66.273217
67262,OECD,Waste - agri-food systems,Emissions (N2O),2018,kilotonnes,68.439822


In [19]:
temperature_df = pd.read_csv('Environment_Temperature_change_E_All_Data_(Normalized).csv', encoding='latin-1', low_memory=False)
temperature_df

Unnamed: 0,ï»¿Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Months Code,Months,Year Code,Year,Unit,Value,Flag,Flag Description
0,ET,Temperature change on land,4,Afghanistan,7271,Temperature change,7020,Meteorological year,1961,1961,Â°C,-0.113,E,Estimated value
1,ET,Temperature change on land,4,Afghanistan,7271,Temperature change,7020,Meteorological year,1962,1962,Â°C,-0.164,E,Estimated value
2,ET,Temperature change on land,4,Afghanistan,7271,Temperature change,7020,Meteorological year,1963,1963,Â°C,0.847,E,Estimated value
3,ET,Temperature change on land,4,Afghanistan,7271,Temperature change,7020,Meteorological year,1964,1964,Â°C,-0.764,E,Estimated value
4,ET,Temperature change on land,4,Afghanistan,7271,Temperature change,7020,Meteorological year,1965,1965,Â°C,-0.244,E,Estimated value
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13998,ET,Temperature change on land,716,Zimbabwe,7271,Temperature change,7020,Meteorological year,2018,2018,Â°C,0.453,E,Estimated value
13999,ET,Temperature change on land,716,Zimbabwe,7271,Temperature change,7020,Meteorological year,2019,2019,Â°C,0.925,E,Estimated value
14000,ET,Temperature change on land,716,Zimbabwe,7271,Temperature change,7020,Meteorological year,2020,2020,Â°C,0.389,E,Estimated value
14001,ET,Temperature change on land,716,Zimbabwe,7271,Temperature change,7020,Meteorological year,2021,2021,Â°C,-0.125,E,Estimated value


In [20]:
temperature_df.columns

Index(['ï»¿Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Element Code',
       'Element', 'Months Code', 'Months', 'Year Code', 'Year', 'Unit',
       'Value', 'Flag', 'Flag Description'],
      dtype='object')

In [21]:
temperature_var_fin = temperature_df.drop(columns=['ï»¿Domain Code','Domain', 'Area Code (M49)', 'Element Code','Element', 'Months Code','Months', 'Year Code', 'Flag', 'Flag Description'])
temperature_var_fin

Unnamed: 0,Area,Year,Unit,Value
0,Afghanistan,1961,Â°C,-0.113
1,Afghanistan,1962,Â°C,-0.164
2,Afghanistan,1963,Â°C,0.847
3,Afghanistan,1964,Â°C,-0.764
4,Afghanistan,1965,Â°C,-0.244
...,...,...,...,...
13998,Zimbabwe,2018,Â°C,0.453
13999,Zimbabwe,2019,Â°C,0.925
14000,Zimbabwe,2020,Â°C,0.389
14001,Zimbabwe,2021,Â°C,-0.125


Try to use a simpler df for employment in agricolture 

In [22]:
employment_agr = pd.read_csv('employent_agricolture.csv')
employment_agr

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Indicator Code,Indicator,Sex Code,Sex,Year Code,Year,Element Code,Element,Source Code,Source,Unit,Value,Flag,Flag Description,Note
0,OEA,Employment Indicators: Agriculture,4,Afghanistan,21155,"Share of employment in agriculture, forestry a...",1,Total,2008,2008,6121,Value,3021,Household income and expenditure survey,%,59.1,X,Figure from international organizations,"Break in series: Methodology revised, Reposito..."
1,OEA,Employment Indicators: Agriculture,4,Afghanistan,21155,"Share of employment in agriculture, forestry a...",1,Total,2012,2012,6121,Value,3021,Household income and expenditure survey,%,38.6,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
2,OEA,Employment Indicators: Agriculture,4,Afghanistan,21155,"Share of employment in agriculture, forestry a...",1,Total,2014,2014,6121,Value,3021,Household income and expenditure survey,%,40.3,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
3,OEA,Employment Indicators: Agriculture,4,Afghanistan,21155,"Share of employment in agriculture, forestry a...",1,Total,2017,2017,6121,Value,3021,Household income and expenditure survey,%,42.8,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
4,OEA,Employment Indicators: Agriculture,4,Afghanistan,21155,"Share of employment in agriculture, forestry a...",2,Male,2008,2008,6121,Value,3021,Household income and expenditure survey,%,50.2,X,Figure from international organizations,"Break in series: Methodology revised, Reposito..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11650,OEA,Employment Indicators: Agriculture,716,Zimbabwe,21155,"Share of employment in agriculture, forestry a...",3,Female,1999,1999,6121,Value,3023,Labour force survey,%,69.5,X,Figure from international organizations,Data reference period: June
11651,OEA,Employment Indicators: Agriculture,716,Zimbabwe,21155,"Share of employment in agriculture, forestry a...",3,Female,2004,2004,6121,Value,3023,Labour force survey,%,71.1,X,Figure from international organizations,Data reference period: June
11652,OEA,Employment Indicators: Agriculture,716,Zimbabwe,21155,"Share of employment in agriculture, forestry a...",3,Female,2011,2011,6121,Value,3023,Labour force survey,%,71.7,X,Figure from international organizations,"Break in series: Methodology revised, Reposito..."
11653,OEA,Employment Indicators: Agriculture,716,Zimbabwe,21155,"Share of employment in agriculture, forestry a...",3,Female,2014,2014,6121,Value,3023,Labour force survey,%,71.6,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...


In [23]:
employment_agr.columns

Index(['Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Indicator Code',
       'Indicator', 'Sex Code', 'Sex', 'Year Code', 'Year', 'Element Code',
       'Element', 'Source Code', 'Source', 'Unit', 'Value', 'Flag',
       'Flag Description', 'Note'],
      dtype='object')

In [24]:
employment_agr2 = employment_agr.drop(columns=['Domain Code', 'Domain', 'Area Code (M49)','Sex Code', 'Indicator Code', 'Year Code', 'Element Code', 'Source Code', 'Flag', 'Note'])
employment_agr2

Unnamed: 0,Area,Indicator,Sex,Year,Element,Source,Unit,Value,Flag Description
0,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2008,Value,Household income and expenditure survey,%,59.1,Figure from international organizations
1,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2012,Value,Household income and expenditure survey,%,38.6,Figure from international organizations
2,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2014,Value,Household income and expenditure survey,%,40.3,Figure from international organizations
3,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2017,Value,Household income and expenditure survey,%,42.8,Figure from international organizations
4,Afghanistan,"Share of employment in agriculture, forestry a...",Male,2008,Value,Household income and expenditure survey,%,50.2,Figure from international organizations
...,...,...,...,...,...,...,...,...,...
11650,Zimbabwe,"Share of employment in agriculture, forestry a...",Female,1999,Value,Labour force survey,%,69.5,Figure from international organizations
11651,Zimbabwe,"Share of employment in agriculture, forestry a...",Female,2004,Value,Labour force survey,%,71.1,Figure from international organizations
11652,Zimbabwe,"Share of employment in agriculture, forestry a...",Female,2011,Value,Labour force survey,%,71.7,Figure from international organizations
11653,Zimbabwe,"Share of employment in agriculture, forestry a...",Female,2014,Value,Labour force survey,%,71.6,Figure from international organizations


In [25]:
employment_agr2 = employment_agr2.loc[employment_agr2["Sex"] == 'Total']
employment_agr2 

Unnamed: 0,Area,Indicator,Sex,Year,Element,Source,Unit,Value,Flag Description
0,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2008,Value,Household income and expenditure survey,%,59.1,Figure from international organizations
1,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2012,Value,Household income and expenditure survey,%,38.6,Figure from international organizations
2,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2014,Value,Household income and expenditure survey,%,40.3,Figure from international organizations
3,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2017,Value,Household income and expenditure survey,%,42.8,Figure from international organizations
12,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2020,Value,Labour force survey,%,44.7,Figure from international organizations
...,...,...,...,...,...,...,...,...,...
11640,Zimbabwe,"Share of employment in agriculture, forestry a...",Total,1999,Value,Labour force survey,%,60.0,Figure from international organizations
11641,Zimbabwe,"Share of employment in agriculture, forestry a...",Total,2004,Value,Labour force survey,%,64.8,Figure from international organizations
11642,Zimbabwe,"Share of employment in agriculture, forestry a...",Total,2011,Value,Labour force survey,%,65.9,Figure from international organizations
11643,Zimbabwe,"Share of employment in agriculture, forestry a...",Total,2014,Value,Labour force survey,%,67.2,Figure from international organizations


In [26]:
production_crops_df = pd.read_csv('Production_Crops_Livestock_E_All_Data_(Normalized).csv', encoding='latin-1', low_memory=False)
production_crops_df

Unnamed: 0,ï»¿Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (CPC),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,QCL,Crops and livestock products,32,Argentina,5510,Production,1199.9,Cereals n.e.c.,1961,1961,tonnes,16000.00,A,Official figure
1,QCL,Crops and livestock products,32,Argentina,5510,Production,1199.9,Cereals n.e.c.,1962,1962,tonnes,14000.00,A,Official figure
2,QCL,Crops and livestock products,32,Argentina,5510,Production,1199.9,Cereals n.e.c.,1963,1963,tonnes,15000.00,A,Official figure
3,QCL,Crops and livestock products,32,Argentina,5510,Production,1199.9,Cereals n.e.c.,1964,1964,tonnes,17000.00,A,Official figure
4,QCL,Crops and livestock products,32,Argentina,5510,Production,1199.9,Cereals n.e.c.,1965,1965,tonnes,20000.00,A,Official figure
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2742,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1199.9,Cereals n.e.c.,2017,2017,tonnes,2303.16,I,Imputed value
2743,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1199.9,Cereals n.e.c.,2018,2018,tonnes,2320.87,I,Imputed value
2744,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1199.9,Cereals n.e.c.,2019,2019,tonnes,2303.11,I,Imputed value
2745,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1199.9,Cereals n.e.c.,2020,2020,tonnes,2309.42,I,Imputed value


In [27]:
production_crops_df.columns

Index(['ï»¿Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Element Code',
       'Element', 'Item Code (CPC)', 'Item', 'Year Code', 'Year', 'Unit',
       'Value', 'Flag', 'Flag Description'],
      dtype='object')

In [28]:
production_cereal = production_crops_df.drop(columns=['ï»¿Domain Code', 'Domain','Area Code (M49)',  'Item Code (CPC)', 'Element Code', 'Year Code', 'Flag', 'Flag Description'])
production_cereal

Unnamed: 0,Area,Element,Item,Year,Unit,Value
0,Argentina,Production,Cereals n.e.c.,1961,tonnes,16000.00
1,Argentina,Production,Cereals n.e.c.,1962,tonnes,14000.00
2,Argentina,Production,Cereals n.e.c.,1963,tonnes,15000.00
3,Argentina,Production,Cereals n.e.c.,1964,tonnes,17000.00
4,Argentina,Production,Cereals n.e.c.,1965,tonnes,20000.00
...,...,...,...,...,...,...
2742,Zimbabwe,Production,Cereals n.e.c.,2017,tonnes,2303.16
2743,Zimbabwe,Production,Cereals n.e.c.,2018,tonnes,2320.87
2744,Zimbabwe,Production,Cereals n.e.c.,2019,tonnes,2303.11
2745,Zimbabwe,Production,Cereals n.e.c.,2020,tonnes,2309.42


In [29]:
gdp = pd.read_csv('GDP_total.csv')
gdp

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,MK,Macro Indicators,4,Afghanistan,6110,Value US$,22008,Gross Domestic Product,1970,1970,millions,1731.435587,X,Figure from international organizations,
1,MK,Macro Indicators,4,Afghanistan,6110,Value US$,22008,Gross Domestic Product,1971,1971,millions,1812.837521,X,Figure from international organizations,
2,MK,Macro Indicators,4,Afghanistan,6110,Value US$,22008,Gross Domestic Product,1972,1972,millions,1647.900178,X,Figure from international organizations,
3,MK,Macro Indicators,4,Afghanistan,6110,Value US$,22008,Gross Domestic Product,1973,1973,millions,1702.716294,X,Figure from international organizations,
4,MK,Macro Indicators,4,Afghanistan,6110,Value US$,22008,Gross Domestic Product,1974,1974,millions,2061.729287,X,Figure from international organizations,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10692,MK,Macro Indicators,716,Zimbabwe,6110,Value US$,22008,Gross Domestic Product,2018,2018,millions,23645.265550,X,Figure from international organizations,
10693,MK,Macro Indicators,716,Zimbabwe,6110,Value US$,22008,Gross Domestic Product,2019,2019,millions,22594.520380,X,Figure from international organizations,
10694,MK,Macro Indicators,716,Zimbabwe,6110,Value US$,22008,Gross Domestic Product,2020,2020,millions,21664.745860,X,Figure from international organizations,
10695,MK,Macro Indicators,716,Zimbabwe,6110,Value US$,22008,Gross Domestic Product,2021,2021,millions,24118.150860,X,Figure from international organizations,


In [30]:
gdp.columns

Index(['Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Element Code',
       'Element', 'Item Code', 'Item', 'Year Code', 'Year', 'Unit', 'Value',
       'Flag', 'Flag Description', 'Note'],
      dtype='object')

In [31]:
gdp = gdp.drop(columns = ['Domain Code', 'Domain', 'Area Code (M49)', 'Element Code' , 'Item Code', 'Year Code', 'Flag', 'Flag Description', 'Note'])
gdp

Unnamed: 0,Area,Element,Item,Year,Unit,Value
0,Afghanistan,Value US$,Gross Domestic Product,1970,millions,1731.435587
1,Afghanistan,Value US$,Gross Domestic Product,1971,millions,1812.837521
2,Afghanistan,Value US$,Gross Domestic Product,1972,millions,1647.900178
3,Afghanistan,Value US$,Gross Domestic Product,1973,millions,1702.716294
4,Afghanistan,Value US$,Gross Domestic Product,1974,millions,2061.729287
...,...,...,...,...,...,...
10692,Zimbabwe,Value US$,Gross Domestic Product,2018,millions,23645.265550
10693,Zimbabwe,Value US$,Gross Domestic Product,2019,millions,22594.520380
10694,Zimbabwe,Value US$,Gross Domestic Product,2020,millions,21664.745860
10695,Zimbabwe,Value US$,Gross Domestic Product,2021,millions,24118.150860


In [32]:
precipitations_df = pd.read_csv('average-precipitation-per-year.csv')
precipitations_df.drop(columns=['Code'])

Unnamed: 0,Entity,Year,Average precipitation in depth (mm per year)
0,Afghanistan,1962,327.0
1,Afghanistan,1967,327.0
2,Afghanistan,1972,327.0
3,Afghanistan,1977,327.0
4,Afghanistan,1982,327.0
...,...,...,...
2017,Zimbabwe,1997,657.0
2018,Zimbabwe,2002,657.0
2019,Zimbabwe,2007,657.0
2020,Zimbabwe,2012,657.0


In [33]:
prices_df = pd.read_csv('prices_data.csv')
prices_df = prices_df.drop(columns=['Domain Code', 'Domain', 'Area Code (M49)', 'Item Code', 'Item', 'Year Code', 'Months Code', 'Months', 'Unit', 'Flag', 'Flag Description', 'Note'])
prices_df = prices_df.groupby(['Area', 'Year']).mean()
prices_df = prices_df.reset_index()
prices_df

Unnamed: 0,Area,Year,Value
0,Afghanistan,2010,75.083788
1,Afghanistan,2011,83.096066
2,Afghanistan,2012,87.028704
3,Afghanistan,2013,93.618813
4,Afghanistan,2014,100.861938
...,...,...,...
1165,Åland Islands,2018,101.115037
1166,Åland Islands,2019,102.928436
1167,Åland Islands,2020,103.585137
1168,Åland Islands,2021,104.784347


###  Delete all years before 2010 (already done for prices) and rename countries columns

In [34]:
temp_lessthan2010 = temperature_var_fin[temperature_var_fin['Year'] < 2010]
temp_lt2010 = temp_lessthan2010.index
temperature_var_fin_ok = temperature_var_fin.drop(temp_lt2010)
temperature_var_fin_ok.rename(columns={'Area': 'Country'}, inplace=True)

In [35]:
cereal_lessthan2010 = production_cereal[production_cereal['Year'] < 2010]
cereal_lt2010 = cereal_lessthan2010.index
production_cereal2 = production_cereal.drop(cereal_lt2010)
production_cereal2.rename(columns={'Area': 'Country'}, inplace=True)

In [36]:
gdp_lessthan2010 = gdp[gdp['Year'] < 2010]
gdp_lt2010 = gdp_lessthan2010.index
gdp2 = gdp.drop(gdp_lt2010)
gdp2.rename(columns={'Area': 'Country'}, inplace=True)

In [37]:
popfin_lessthan2010 = population_fin[population_fin['Year'] < 2010]
popfin_lt2010 = popfin_lessthan2010.index
population_fin2 = population_fin.drop(popfin_lt2010)
population_fin2.rename(columns={'Area': 'Country'}, inplace=True)

In [38]:
emission_lessthan2010 = emission_share[emission_share['Year'] < 2010]
emission_lt2010 = emission_lessthan2010.index
emission_share2 = emission_share.drop(emission_lt2010)
emission_share2.rename(columns={'Entity': 'Country'}, inplace=True)

emission_share2 = emission_share2.replace("Russia", "Russian Federation")
emission_share2['Country'].unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Asia', 'Asia (excl. China and India)', 'Australia', 'Austria',
       'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
       'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan',
       'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire",
       'Croatia', 'Cuba', 'Curacao', 'Cyprus', 'Czechia',
       'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Es

In [39]:
employment_lessthan2010 = employment_agr2[employment_agr2['Year'] < 2010]
employment_lt2010 = employment_lessthan2010.index
employment_agr3 = employment_agr2.drop(employment_lt2010)
employment_agr3.rename(columns={'Area': 'Country'}, inplace=True)

In [40]:
precipitations_lessthan2010 = precipitations_df[precipitations_df['Year'] < 2010]
precipitations_lt2010 = precipitations_lessthan2010.index
precipitations_df2 = precipitations_df.drop(precipitations_lt2010)
precipitations_df2.rename(columns={'Entity': 'Country'}, inplace=True)

In [41]:
prices_df.rename(columns={'Area': 'Country'}, inplace=True)
prices_df

Unnamed: 0,Country,Year,Value
0,Afghanistan,2010,75.083788
1,Afghanistan,2011,83.096066
2,Afghanistan,2012,87.028704
3,Afghanistan,2013,93.618813
4,Afghanistan,2014,100.861938
...,...,...,...
1165,Åland Islands,2018,101.115037
1166,Åland Islands,2019,102.928436
1167,Åland Islands,2020,103.585137
1168,Åland Islands,2021,104.784347


### Import continents dataset

In [42]:
continents = pd.read_csv('Countries-Continents.csv')
continents

Unnamed: 0,Continent,Country
0,Africa,Algeria
1,Africa,Angola
2,Africa,Benin
3,Africa,Botswana
4,Africa,Burkina
...,...,...
189,South America,Paraguay
190,South America,Peru
191,South America,Suriname
192,South America,Uruguay


### Merge datasets with the continent dataset

In [43]:
temperature_var_fin2 = pd.merge(temperature_var_fin_ok, continents, how = 'inner', on = 'Country')
temperature_var_fin2


Unnamed: 0,Country,Year,Unit,Value,Continent
0,Afghanistan,2010,Â°C,1.613,Asia
1,Afghanistan,2011,Â°C,1.397,Asia
2,Afghanistan,2012,Â°C,0.223,Asia
3,Afghanistan,2013,Â°C,1.281,Asia
4,Afghanistan,2014,Â°C,0.456,Asia
...,...,...,...,...,...
2177,Zimbabwe,2018,Â°C,0.453,Africa
2178,Zimbabwe,2019,Â°C,0.925,Africa
2179,Zimbabwe,2020,Â°C,0.389,Africa
2180,Zimbabwe,2021,Â°C,-0.125,Africa


In [44]:
production_cereal3 = pd.merge(production_cereal2, continents, how = 'inner', on = 'Country')
production_cereal3


Unnamed: 0,Country,Element,Item,Year,Unit,Value,Continent
0,Argentina,Production,Cereals n.e.c.,2010,tonnes,18978.00,South America
1,Argentina,Production,Cereals n.e.c.,2011,tonnes,16459.00,South America
2,Argentina,Production,Cereals n.e.c.,2012,tonnes,14750.00,South America
3,Argentina,Production,Cereals n.e.c.,2013,tonnes,19500.00,South America
4,Argentina,Production,Cereals n.e.c.,2014,tonnes,16269.58,South America
...,...,...,...,...,...,...,...
689,Zimbabwe,Production,Cereals n.e.c.,2017,tonnes,2303.16,Africa
690,Zimbabwe,Production,Cereals n.e.c.,2018,tonnes,2320.87,Africa
691,Zimbabwe,Production,Cereals n.e.c.,2019,tonnes,2303.11,Africa
692,Zimbabwe,Production,Cereals n.e.c.,2020,tonnes,2309.42,Africa


In [45]:
gdp3 = pd.merge(gdp2, continents, how = 'inner', on = 'Country')
gdp3

Unnamed: 0,Country,Element,Item,Year,Unit,Value,Continent
0,Afghanistan,Value US$,Gross Domestic Product,2010,millions,14698.88968,Asia
1,Afghanistan,Value US$,Gross Domestic Product,2011,millions,17350.69495,Asia
2,Afghanistan,Value US$,Gross Domestic Product,2012,millions,19136.49934,Asia
3,Afghanistan,Value US$,Gross Domestic Product,2013,millions,19621.80246,Asia
4,Afghanistan,Value US$,Gross Domestic Product,2014,millions,19550.70257,Asia
...,...,...,...,...,...,...,...
2179,Zimbabwe,Value US$,Gross Domestic Product,2018,millions,23645.26555,Africa
2180,Zimbabwe,Value US$,Gross Domestic Product,2019,millions,22594.52038,Africa
2181,Zimbabwe,Value US$,Gross Domestic Product,2020,millions,21664.74586,Africa
2182,Zimbabwe,Value US$,Gross Domestic Product,2021,millions,24118.15086,Africa


In [46]:
population_fin3 = pd.merge(population_fin2, continents, how = 'inner', on = 'Country')
population_fin3

Unnamed: 0,Domain,Country,Element,Year,Unit,Value,Continent
0,Annual population,Afghanistan,Total Population - Both sexes,2010,1000 persons,28189.672,Asia
1,Annual population,Afghanistan,Total Population - Both sexes,2011,1000 persons,29249.157,Asia
2,Annual population,Afghanistan,Total Population - Both sexes,2012,1000 persons,30466.479,Asia
3,Annual population,Afghanistan,Total Population - Both sexes,2013,1000 persons,31541.209,Asia
4,Annual population,Afghanistan,Total Population - Both sexes,2014,1000 persons,32716.210,Asia
...,...,...,...,...,...,...,...
2007,Annual population,Zimbabwe,Total Population - Both sexes,2017,1000 persons,14751.101,Africa
2008,Annual population,Zimbabwe,Total Population - Both sexes,2018,1000 persons,15052.184,Africa
2009,Annual population,Zimbabwe,Total Population - Both sexes,2019,1000 persons,15354.608,Africa
2010,Annual population,Zimbabwe,Total Population - Both sexes,2020,1000 persons,15669.666,Africa


In [47]:
emission_share3 = pd.merge(emission_share2, continents, how = 'inner', on = 'Country')
emission_share3

Unnamed: 0,Country,Code,Year,Share of global annual CO₂ emissions,Continent
0,Afghanistan,AFG,2010,0.025071,Asia
1,Afghanistan,AFG,2011,0.034327,Asia
2,Afghanistan,AFG,2012,0.028667,Asia
3,Afghanistan,AFG,2013,0.026191,Asia
4,Afghanistan,AFG,2014,0.025776,Asia
...,...,...,...,...,...
2143,Zimbabwe,ZWE,2017,0.026584,Africa
2144,Zimbabwe,ZWE,2018,0.032030,Africa
2145,Zimbabwe,ZWE,2019,0.029973,Africa
2146,Zimbabwe,ZWE,2020,0.030081,Africa


In [48]:
employment_agr4 = pd.merge(employment_agr3, continents, how = 'inner', on = 'Country')
employment_agr4

Unnamed: 0,Country,Indicator,Sex,Year,Element,Source,Unit,Value,Flag Description,Continent
0,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2012,Value,Household income and expenditure survey,%,38.6,Figure from international organizations,Asia
1,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2014,Value,Household income and expenditure survey,%,40.3,Figure from international organizations,Asia
2,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2017,Value,Household income and expenditure survey,%,42.8,Figure from international organizations,Asia
3,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2020,Value,Labour force survey,%,44.7,Figure from international organizations,Asia
4,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2021,Value,Labour force survey,%,25.1,Figure from international organizations,Asia
...,...,...,...,...,...,...,...,...,...,...
1065,Zambia,"Share of employment in agriculture, forestry a...",Total,2019,Value,Labour force survey,%,57.4,Figure from international organizations,Africa
1066,Zambia,"Share of employment in agriculture, forestry a...",Total,2020,Value,Labour force survey,%,59.1,Figure from international organizations,Africa
1067,Zimbabwe,"Share of employment in agriculture, forestry a...",Total,2011,Value,Labour force survey,%,65.9,Figure from international organizations,Africa
1068,Zimbabwe,"Share of employment in agriculture, forestry a...",Total,2014,Value,Labour force survey,%,67.2,Figure from international organizations,Africa


In [49]:
precipitations_df3 = pd.merge(precipitations_df2, continents, how = 'inner', on = 'Country')
precipitations_df3

Unnamed: 0,Country,Code,Year,Average precipitation in depth (mm per year),Continent
0,Afghanistan,AFG,2012,327.0,Asia
1,Afghanistan,AFG,2017,327.0,Asia
2,Albania,ALB,2012,1485.0,Europe
3,Albania,ALB,2017,1485.0,Europe
4,Algeria,DZA,2012,89.0,Africa
...,...,...,...,...,...
331,Yemen,YEM,2017,167.0,Asia
332,Zambia,ZMB,2012,1020.0,Africa
333,Zambia,ZMB,2017,1020.0,Africa
334,Zimbabwe,ZWE,2012,657.0,Africa


In [50]:
prices_df2 = pd.merge(prices_df, continents, how = 'inner', on = 'Country')
prices_df2

Unnamed: 0,Country,Year,Value,Continent
0,Afghanistan,2010,75.083788,Asia
1,Afghanistan,2011,83.096066,Asia
2,Afghanistan,2012,87.028704,Asia
3,Afghanistan,2013,93.618813,Asia
4,Afghanistan,2014,100.861938,Asia
...,...,...,...,...
918,Yemen,2018,131.374726,Asia
919,Yemen,2019,138.471777,Asia
920,Yemen,2020,149.003291,Asia
921,Yemen,2021,156.070264,Asia


In [51]:
temperature = pd.read_csv('GlobalLandTemperaturesByCountry.csv')
temperature

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland
...,...,...,...,...
577457,2013-05-01,19.059,1.022,Zimbabwe
577458,2013-06-01,17.613,0.473,Zimbabwe
577459,2013-07-01,17.000,0.453,Zimbabwe
577460,2013-08-01,19.759,0.717,Zimbabwe


In [81]:
temperature2 = pd.merge(temperature, continents, how = 'inner', on = 'Country')
temperature2 

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Continent
0,1838-04-01,13.008,2.586,Afghanistan,Asia
1,1838-05-01,,,Afghanistan,Asia
2,1838-06-01,23.950,2.510,Afghanistan,Asia
3,1838-07-01,26.877,2.883,Afghanistan,Asia
4,1838-08-01,24.938,2.992,Afghanistan,Asia
...,...,...,...,...,...
406424,2013-05-01,19.059,1.022,Zimbabwe,Africa
406425,2013-06-01,17.613,0.473,Zimbabwe,Africa
406426,2013-07-01,17.000,0.453,Zimbabwe,Africa
406427,2013-08-01,19.759,0.717,Zimbabwe,Africa


### Extract only Europe and Asia from all the datasets

In [52]:
temp_eurasia = temperature_var_fin2.loc[temperature_var_fin2['Continent'].isin(['Europe', 'Asia'])]
temp_eurasia

Unnamed: 0,Country,Year,Unit,Value,Continent
0,Afghanistan,2010,Â°C,1.613,Asia
1,Afghanistan,2011,Â°C,1.397,Asia
2,Afghanistan,2012,Â°C,0.223,Asia
3,Afghanistan,2013,Â°C,1.281,Asia
4,Afghanistan,2014,Â°C,0.456,Asia
...,...,...,...,...,...
2151,Yemen,2018,Â°C,,Asia
2152,Yemen,2019,Â°C,,Asia
2153,Yemen,2020,Â°C,,Asia
2154,Yemen,2021,Â°C,,Asia


In [53]:
cereal_eurasia = production_cereal3.loc[production_cereal3['Continent'].isin(['Europe', 'Asia'])]
cereal_eurasia

Unnamed: 0,Country,Element,Item,Year,Unit,Value,Continent
12,Armenia,Production,Cereals n.e.c.,2010,tonnes,4219.0,Europe
13,Armenia,Production,Cereals n.e.c.,2011,tonnes,9507.0,Europe
14,Armenia,Production,Cereals n.e.c.,2012,tonnes,14375.0,Europe
15,Armenia,Production,Cereals n.e.c.,2013,tonnes,16847.0,Europe
16,Armenia,Production,Cereals n.e.c.,2014,tonnes,18268.0,Europe
...,...,...,...,...,...,...,...
677,Yemen,Production,Cereals n.e.c.,2017,tonnes,0.0,Asia
678,Yemen,Production,Cereals n.e.c.,2018,tonnes,0.0,Asia
679,Yemen,Production,Cereals n.e.c.,2019,tonnes,0.0,Asia
680,Yemen,Production,Cereals n.e.c.,2020,tonnes,0.0,Asia


In [54]:
gdp_eurasia = gdp3.loc[gdp3['Continent'].isin(['Europe', 'Asia'])]
gdp_eurasia

Unnamed: 0,Country,Element,Item,Year,Unit,Value,Continent
0,Afghanistan,Value US$,Gross Domestic Product,2010,millions,14698.889680,Asia
1,Afghanistan,Value US$,Gross Domestic Product,2011,millions,17350.694950,Asia
2,Afghanistan,Value US$,Gross Domestic Product,2012,millions,19136.499340,Asia
3,Afghanistan,Value US$,Gross Domestic Product,2013,millions,19621.802460,Asia
4,Afghanistan,Value US$,Gross Domestic Product,2014,millions,19550.702570,Asia
...,...,...,...,...,...,...,...
2153,Yemen,Value US$,Gross Domestic Product,2018,millions,26671.579500,Asia
2154,Yemen,Value US$,Gross Domestic Product,2019,millions,12980.213070,Asia
2155,Yemen,Value US$,Gross Domestic Product,2020,millions,9416.580652,Asia
2156,Yemen,Value US$,Gross Domestic Product,2021,millions,9946.815477,Asia


In [55]:
population_eurasia = population_fin3.loc[population_fin3['Continent'].isin(['Europe', 'Asia'])]
population_eurasia

Unnamed: 0,Domain,Country,Element,Year,Unit,Value,Continent
0,Annual population,Afghanistan,Total Population - Both sexes,2010,1000 persons,28189.672,Asia
1,Annual population,Afghanistan,Total Population - Both sexes,2011,1000 persons,29249.157,Asia
2,Annual population,Afghanistan,Total Population - Both sexes,2012,1000 persons,30466.479,Asia
3,Annual population,Afghanistan,Total Population - Both sexes,2013,1000 persons,31541.209,Asia
4,Annual population,Afghanistan,Total Population - Both sexes,2014,1000 persons,32716.210,Asia
...,...,...,...,...,...,...,...
1983,Annual population,Yemen,Total Population - Both sexes,2017,1000 persons,30034.389,Asia
1984,Annual population,Yemen,Total Population - Both sexes,2018,1000 persons,30790.513,Asia
1985,Annual population,Yemen,Total Population - Both sexes,2019,1000 persons,31546.691,Asia
1986,Annual population,Yemen,Total Population - Both sexes,2020,1000 persons,32284.046,Asia


In [56]:
emission_eurasia = emission_share3.loc[emission_share3['Continent'].isin(['Europe', 'Asia'])]
emission_eurasia['Country'].unique()


array(['Afghanistan', 'Albania', 'Andorra', 'Armenia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium',
       'Bhutan', 'Bosnia and Herzegovina', 'Brunei', 'Bulgaria',
       'Cambodia', 'China', 'Croatia', 'Cyprus', 'Denmark', 'Estonia',
       'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary',
       'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland',
       'Israel', 'Italy', 'Japan', 'Jordan', 'Kazakhstan', 'Kuwait',
       'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Liechtenstein',
       'Lithuania', 'Luxembourg', 'Malaysia', 'Maldives', 'Malta',
       'Moldova', 'Mongolia', 'Montenegro', 'Nepal', 'Netherlands',
       'Norway', 'Oman', 'Pakistan', 'Philippines', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Russian Federation', 'Saudi Arabia', 'Serbia',
       'Singapore', 'Slovakia', 'Slovenia', 'Spain', 'Sri Lanka',
       'Sweden', 'Switzerland', 'Syria', 'Tajikistan', 'Thailand',
       'Turkey', 'Turkmenistan', 'Ukrai

In [57]:
employment_eurasia = employment_agr4.loc[employment_agr4['Continent'].isin(['Europe', 'Asia'])]
employment_eurasia

Unnamed: 0,Country,Indicator,Sex,Year,Element,Source,Unit,Value,Flag Description,Continent
0,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2012,Value,Household income and expenditure survey,%,38.6,Figure from international organizations,Asia
1,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2014,Value,Household income and expenditure survey,%,40.3,Figure from international organizations,Asia
2,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2017,Value,Household income and expenditure survey,%,42.8,Figure from international organizations,Asia
3,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2020,Value,Labour force survey,%,44.7,Figure from international organizations,Asia
4,Afghanistan,"Share of employment in agriculture, forestry a...",Total,2021,Value,Labour force survey,%,25.1,Figure from international organizations,Asia
...,...,...,...,...,...,...,...,...,...,...
1055,Uzbekistan,"Share of employment in agriculture, forestry a...",Total,2018,Value,Household survey,%,26.6,Figure from international organizations,Asia
1056,Uzbekistan,"Share of employment in agriculture, forestry a...",Total,2019,Value,Household survey,%,26.2,Figure from international organizations,Asia
1057,Uzbekistan,"Share of employment in agriculture, forestry a...",Total,2020,Value,Household survey,%,26.9,Figure from international organizations,Asia
1060,Yemen,"Share of employment in agriculture, forestry a...",Total,2010,Value,Labour force survey,%,24.1,Figure from international organizations,Asia


In [58]:
precipitations_eurasia = precipitations_df3.loc[precipitations_df3['Continent'].isin(['Europe', 'Asia'])]
precipitations_eurasia

Unnamed: 0,Country,Code,Year,Average precipitation in depth (mm per year),Continent
0,Afghanistan,AFG,2012,327.0,Asia
1,Afghanistan,AFG,2017,327.0,Asia
2,Albania,ALB,2012,1485.0,Europe
3,Albania,ALB,2017,1485.0,Europe
12,Armenia,ARM,2012,562.0,Europe
...,...,...,...,...,...
323,Uzbekistan,UZB,2017,206.0,Asia
328,Vietnam,VNM,2012,1821.0,Asia
329,Vietnam,VNM,2017,1821.0,Asia
330,Yemen,YEM,2012,167.0,Asia


In [59]:
prices_eurasia = prices_df2.loc[prices_df2['Continent'].isin(['Europe', 'Asia'])]
prices_eurasia

Unnamed: 0,Country,Year,Value,Continent
0,Afghanistan,2010,75.083788,Asia
1,Afghanistan,2011,83.096066,Asia
2,Afghanistan,2012,87.028704,Asia
3,Afghanistan,2013,93.618813,Asia
4,Afghanistan,2014,100.861938,Asia
...,...,...,...,...
918,Yemen,2018,131.374726,Asia
919,Yemen,2019,138.471777,Asia
920,Yemen,2020,149.003291,Asia
921,Yemen,2021,156.070264,Asia


In [85]:
temperature3 = temperature2.loc[temperature2['Continent'].isin(['Europe', 'Asia'])]
temperature3

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Continent
0,1838-04-01,13.008,2.586,Afghanistan,Asia
1,1838-05-01,,,Afghanistan,Asia
2,1838-06-01,23.950,2.510,Afghanistan,Asia
3,1838-07-01,26.877,2.883,Afghanistan,Asia
4,1838-08-01,24.938,2.992,Afghanistan,Asia
...,...,...,...,...,...
402494,2013-05-01,31.173,0.993,Yemen,Asia
402495,2013-06-01,32.325,2.249,Yemen,Asia
402496,2013-07-01,31.340,0.804,Yemen,Asia
402497,2013-08-01,30.833,2.352,Yemen,Asia


### Research question definition

From the data cleaning part we can see that Europe and Asia are the continents with most available data. For this reason we narrow down our research on these 2 continents and therefore our research question focuses on the research and visualization of interesting insights about crop production in Europe and Asia.

### Save the datasets as csv

In [86]:
temp_eurasia.to_csv('temperature_var.csv')
cereal_eurasia.to_csv('production_cereal.csv')
gdp_eurasia.to_csv('gdp.csv')
population_eurasia.to_csv('population.csv')
emission_eurasia.to_csv('emission_share.csv')
employment_eurasia.to_csv('employement_agr.csv')
precipitations_eurasia.to_csv('precipitations.csv')
prices_eurasia.to_csv('prices.csv')
temperature3.to_csv('temperature_data.csv')

### Make a deeper clean of datasets now that we just have Europe and Asia

First, we check if there are still null values.

In [61]:
temp_eurasia.isnull().sum()

Country       0
Year          0
Unit          0
Value        13
Continent     0
dtype: int64

In [62]:
cereal_eurasia.isnull().sum()

Country      0
Element      0
Item         0
Year         0
Unit         0
Value        0
Continent    0
dtype: int64

In [63]:
gdp_eurasia.isnull().sum()

Country      0
Element      0
Item         0
Year         0
Unit         0
Value        0
Continent    0
dtype: int64

In [64]:
population_eurasia.isnull().sum()

Domain       0
Country      0
Element      0
Year         0
Unit         0
Value        0
Continent    0
dtype: int64

In [65]:
emission_eurasia.isnull().sum()

Country                                 0
Code                                    0
Year                                    0
Share of global annual CO₂ emissions    0
Continent                               0
dtype: int64

In [66]:
employment_eurasia.isnull().sum()

Country             0
Indicator           0
Sex                 0
Year                0
Element             0
Source              0
Unit                0
Value               0
Flag Description    0
Continent           0
dtype: int64

In [67]:
precipitations_eurasia.isnull().sum()

Country                                         0
Code                                            0
Year                                            0
Average precipitation in depth (mm per year)    0
Continent                                       0
dtype: int64

In [68]:
prices_eurasia.isnull().sum()

Country      0
Year         0
Value        0
Continent    0
dtype: int64

As only temp_eurasia is the only dataset with null values, we can further investigate to which country their related to.

In [69]:
temp_eurasia_null = temp_eurasia.loc[temp_eurasia['Value'].isnull()]
yemen = temp_eurasia.loc[temp_eurasia['Country'] == 'Yemen']
temp_eurasia_null.index == yemen.index

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

From this previous command we can see that all the rows that contain Yemen as a country have null values. We can therefore drop these rows.

In [70]:
temp_eurasia2 = temp_eurasia.drop(temp_eurasia_null.index)
temp_eurasia2

Unnamed: 0,Country,Year,Unit,Value,Continent
0,Afghanistan,2010,Â°C,1.613,Asia
1,Afghanistan,2011,Â°C,1.397,Asia
2,Afghanistan,2012,Â°C,0.223,Asia
3,Afghanistan,2013,Â°C,1.281,Asia
4,Afghanistan,2014,Â°C,0.456,Asia
...,...,...,...,...,...
2125,Uzbekistan,2018,Â°C,1.044,Asia
2126,Uzbekistan,2019,Â°C,1.943,Asia
2127,Uzbekistan,2020,Â°C,1.721,Asia
2128,Uzbekistan,2021,Â°C,1.532,Asia


In [71]:
temp_eurasia2['Country'].unique()

array(['Afghanistan', 'Albania', 'Andorra', 'Armenia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium',
       'Bhutan', 'Bosnia and Herzegovina', 'Bulgaria', 'Cambodia',
       'China', 'Croatia', 'Cyprus', 'Denmark', 'Estonia', 'Finland',
       'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'India', 'Indonesia', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Japan', 'Jordan', 'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Latvia',
       'Lebanon', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malaysia',
       'Maldives', 'Malta', 'Monaco', 'Mongolia', 'Montenegro', 'Nepal',
       'Norway', 'Oman', 'Pakistan', 'Philippines', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Russian Federation', 'San Marino',
       'Saudi Arabia', 'Serbia', 'Singapore', 'Slovakia', 'Slovenia',
       'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Tajikistan',
       'Thailand', 'Turkmenistan', 'Ukraine', 'United Arab Emirates',
       'Uzbekistan'], dty

In [72]:
cereal_eurasia['Country'].unique()

array(['Armenia', 'Austria', 'Belarus', 'Belgium', 'Bhutan',
       'Bosnia and Herzegovina', 'Bulgaria', 'China', 'Croatia',
       'Estonia', 'Finland', 'France', 'Georgia', 'Greece', 'Hungary',
       'Ireland', 'Italy', 'Kazakhstan', 'Latvia', 'Lithuania',
       'Luxembourg', 'Malta', 'Mongolia', 'Oman', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Russian Federation', 'Serbia', 'Slovakia',
       'Slovenia', 'Spain', 'Switzerland', 'Thailand', 'Ukraine',
       'Uzbekistan', 'Yemen'], dtype=object)

In [73]:
gdp_eurasia['Country'].unique()

array(['Afghanistan', 'Albania', 'Andorra', 'Armenia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium',
       'Bhutan', 'Bosnia and Herzegovina', 'Bulgaria', 'Cambodia',
       'China', 'Croatia', 'Cyprus', 'Denmark', 'Estonia', 'Finland',
       'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'India', 'Indonesia', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Japan', 'Jordan', 'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Latvia',
       'Lebanon', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malaysia',
       'Maldives', 'Malta', 'Monaco', 'Mongolia', 'Montenegro', 'Nepal',
       'Norway', 'Oman', 'Pakistan', 'Philippines', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Russian Federation', 'San Marino',
       'Saudi Arabia', 'Serbia', 'Singapore', 'Slovakia', 'Slovenia',
       'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Tajikistan',
       'Thailand', 'Turkmenistan', 'Ukraine', 'United Arab Emirates',
       'Uzbekistan', 'Yem

In [74]:
population_eurasia['Country'].unique()

array(['Afghanistan', 'Albania', 'Andorra', 'Armenia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium',
       'Bhutan', 'Bosnia and Herzegovina', 'Bulgaria', 'Cambodia',
       'China', 'Croatia', 'Cyprus', 'Denmark', 'Estonia', 'Finland',
       'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'India', 'Indonesia', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Japan', 'Jordan', 'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Latvia',
       'Lebanon', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malaysia',
       'Maldives', 'Malta', 'Monaco', 'Mongolia', 'Montenegro', 'Nepal',
       'Norway', 'Oman', 'Pakistan', 'Philippines', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Russian Federation', 'San Marino',
       'Saudi Arabia', 'Serbia', 'Singapore', 'Slovakia', 'Slovenia',
       'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Tajikistan',
       'Thailand', 'Turkmenistan', 'Ukraine', 'United Arab Emirates',
       'Uzbekistan', 'Yem

In [75]:
emission_eurasia['Country'].unique()

array(['Afghanistan', 'Albania', 'Andorra', 'Armenia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium',
       'Bhutan', 'Bosnia and Herzegovina', 'Brunei', 'Bulgaria',
       'Cambodia', 'China', 'Croatia', 'Cyprus', 'Denmark', 'Estonia',
       'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary',
       'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland',
       'Israel', 'Italy', 'Japan', 'Jordan', 'Kazakhstan', 'Kuwait',
       'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Liechtenstein',
       'Lithuania', 'Luxembourg', 'Malaysia', 'Maldives', 'Malta',
       'Moldova', 'Mongolia', 'Montenegro', 'Nepal', 'Netherlands',
       'Norway', 'Oman', 'Pakistan', 'Philippines', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Russian Federation', 'Saudi Arabia', 'Serbia',
       'Singapore', 'Slovakia', 'Slovenia', 'Spain', 'Sri Lanka',
       'Sweden', 'Switzerland', 'Syria', 'Tajikistan', 'Thailand',
       'Turkey', 'Turkmenistan', 'Ukrai

In [76]:
employment_eurasia['Country'].unique()

array(['Afghanistan', 'Albania', 'Armenia', 'Austria', 'Azerbaijan',
       'Bahrain', 'Bangladesh', 'Belarus', 'Belgium', 'Bhutan',
       'Bosnia and Herzegovina', 'Bulgaria', 'Cambodia', 'China',
       'Croatia', 'Cyprus', 'Denmark', 'Estonia', 'Finland', 'France',
       'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland', 'India',
       'Indonesia', 'Ireland', 'Israel', 'Italy', 'Japan', 'Jordan',
       'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Latvia', 'Lebanon',
       'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malaysia', 'Maldives',
       'Malta', 'Mongolia', 'Montenegro', 'Nepal', 'Norway', 'Oman',
       'Pakistan', 'Philippines', 'Poland', 'Portugal', 'Qatar',
       'Romania', 'Russian Federation', 'San Marino', 'Serbia',
       'Slovakia', 'Slovenia', 'Spain', 'Sri Lanka', 'Sweden',
       'Switzerland', 'Tajikistan', 'Thailand', 'Ukraine',
       'United Arab Emirates', 'Uzbekistan', 'Yemen'], dtype=object)

In [77]:
precipitations_eurasia['Country'].unique()

array(['Afghanistan', 'Albania', 'Armenia', 'Austria', 'Azerbaijan',
       'Bahrain', 'Bangladesh', 'Belarus', 'Belgium', 'Bhutan',
       'Bosnia and Herzegovina', 'Brunei', 'Bulgaria', 'Cambodia',
       'China', 'Croatia', 'Cyprus', 'Denmark', 'Estonia', 'Finland',
       'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Japan', 'Jordan', 'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Laos',
       'Latvia', 'Lebanon', 'Lithuania', 'Luxembourg', 'Malaysia',
       'Maldives', 'Malta', 'Moldova', 'Mongolia', 'Nepal', 'Netherlands',
       'Norway', 'Oman', 'Pakistan', 'Philippines', 'Poland', 'Portugal',
       'Qatar', 'Romania', 'Saudi Arabia', 'Singapore', 'Slovakia',
       'Slovenia', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Syria',
       'Tajikistan', 'Thailand', 'Turkey', 'Turkmenistan', 'Ukraine',
       'United Arab Emirates', 'United Kingdom', 'Uzbekistan', 'Vietnam',
       'Y

In [78]:
prices_eurasia['Country'].unique()

array(['Afghanistan', 'Albania', 'Andorra', 'Armenia', 'Austria',
       'Azerbaijan', 'Bahrain', 'Bangladesh', 'Belarus', 'Belgium',
       'Bhutan', 'Bosnia and Herzegovina', 'Bulgaria', 'Cambodia',
       'Croatia', 'Cyprus', 'Denmark', 'Estonia', 'Finland', 'France',
       'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland', 'India',
       'Indonesia', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Japan',
       'Jordan', 'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Latvia',
       'Lebanon', 'Lithuania', 'Luxembourg', 'Malaysia', 'Maldives',
       'Malta', 'Mongolia', 'Montenegro', 'Nepal', 'Norway', 'Oman',
       'Pakistan', 'Philippines', 'Poland', 'Portugal', 'Qatar',
       'Romania', 'Russian Federation', 'San Marino', 'Saudi Arabia',
       'Serbia', 'Singapore', 'Slovakia', 'Slovenia', 'Spain',
       'Sri Lanka', 'Sweden', 'Switzerland', 'Tajikistan', 'Thailand',
       'Ukraine', 'United Arab Emirates', 'Uzbekistan', 'Yemen'],
      dtype=object)

In [79]:
# when we plot temperature with other datasets, we need to exclude yemen, as we don't have temperature data for yemen

### Plot data from datasets to see trends and patterns

In [80]:
# The datasets we are going to use are
temperature_var_fin #for variation in temperature  per country across years
production_cereal #for production of cereals per country across years
gdp #for gdp of every country and every year, in $
population_fin #for population by country by year
emission_share #for share of total emission by country every year
employment_agr2 #for share of population working in agricolture
precipitations_df # add dataset about precipitations
prices_df  # to see prices
print('ciao')

ciao
