## Dataset Exploration

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

filepath = 'Data/CountriesOnly.csv'

df = pd.read_csv(filepath, encoding = 'unicode_escape')
df = df.drop(['Unnamed: 0'], axis = 1)
df

Unnamed: 0,country,UNRegion,year,record,cropLand,grazingLand,forestLand,fishingGround,builtupLand,carbon,total,GDP,population,Unnamed: 14,countryCode
0,Armenia,Asia,1992,BiocapPerCap,1.611286e-01,1.350234e-01,8.383553e-02,1.371800e-02,3.366853e-02,0.000000e+00,4.273741e-01,949.033,3.449000e+06,,1.0
1,Armenia,Asia,1992,BiocapTotGHA,5.558130e+05,4.657633e+05,2.891907e+05,4.732022e+04,1.161396e+05,0.000000e+00,1.474227e+06,949.033,3.449000e+06,,1.0
2,Armenia,Asia,1992,EFConsPerCap,3.909225e-01,1.891373e-01,1.250000e-06,4.137644e-03,3.366853e-02,1.112225e+00,1.730092e+00,949.033,3.449000e+06,,1.0
3,Armenia,Asia,1992,EFConsTotGHA,1.348487e+06,6.524291e+05,4.327841e+00,1.427280e+04,1.161396e+05,3.836620e+06,5.967954e+06,949.033,3.449000e+06,,1.0
4,Armenia,Asia,1992,EFExportsPerCap,1.124910e-03,2.283036e-03,0.000000e+00,4.383810e-04,0.000000e+00,4.819043e-02,5.203676e-02,949.033,3.449000e+06,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94425,World,World,2018,EFExportsTotGHA,1.359886e+09,3.054277e+08,7.671541e+08,6.480328e+06,0.000000e+00,0.000000e+00,2.438948e+09,,7.631090e+09,,5001.0
94426,World,World,2018,EFImportsPerCap,1.811119e-01,4.847720e-02,1.004372e-01,6.484080e-04,0.000000e+00,0.000000e+00,3.306746e-01,,7.631090e+09,,5001.0
94427,World,World,2018,EFImportsTotGHA,1.382081e+09,3.699339e+08,7.664450e+08,4.948058e+06,0.000000e+00,0.000000e+00,2.523408e+09,,7.631090e+09,,5001.0
94428,World,World,2018,EFProdPerCap,5.134599e-01,1.346414e-01,2.845092e-01,8.953372e-02,6.324995e-02,1.689557e+00,2.774951e+00,,7.631090e+09,,5001.0


In [2]:
# Following countries don't have a UNRegion

print(len(set(df[df.UNRegion.isna()].country)))
len(df[df.UNRegion.isna()].country)
print(set(df[df.UNRegion.isna()].country))

8
{'Saint Vincent and Grenadines', "CÃ´te d'Ivoire", 'Solomon Islands', 'Republic of North Macedonia', 'State of Palestine', 'RÃ©union', 'Republic of Moldova', 'Eswatini'}


In [3]:
df.isna().sum()

country              0
UNRegion           290
year                 0
record               0
cropLand         25040
grazingLand      25040
forestLand       25040
fishingGround    25040
builtupLand      25040
carbon           25040
total                0
GDP              21170
population          10
Unnamed: 14      94430
countryCode       5150
dtype: int64

## Plotly Arab Countries

In [4]:
import numpy as np
import math
import plotly.graph_objs as go
import plotly.offline as py
from plotly import tools
py.init_notebook_mode(connected=True)

arab_countries = ['Egypt','Algeria','Bahrain','Libyan Arab Jamahiriya','Jordan','Iraq','Mauritania','Morocco',
                  'Saudi Arabia','Kuwait','Qatar','Sudan (former)', 'Oman','Tunisia','United Arab Emirates','Yemen',
                  'Lebanon','Syrian Arab Republic','Somalia','Comoros','Djibouti']

colors = ['blue','gray','red','green','pink',
          'steelblue','yellow','magenta','brown',
          'orange','tan','seagreen','olive',
          'turquoise','mintcream','yellowgreen',
          'darkkhaki','coral','chocolate','rosybrown',
          'dodgerblue','heather']

# We have data from the year 1961 to 2018
years=np.sort(df.year.unique())

# Creating a df with data of all Arab countries
arab_df = pd.DataFrame()
for country in arab_countries:
    arab_df = arab_df.append(df[df.country.isin([country])])

In [5]:
# Extracts the rows for a particular country for a particular record type for all 58 years
def extract_country_by_record(df, country_name, record):
    country_foot_print=df[df.country.isin([country_name])]
    country_by_record = country_foot_print [country_foot_print.record.isin([record])]
    return country_by_record

# Returns the following lists - 
# 1. feature_values -> list of population values
# 2. available_countries -> list of countries with population data
# 3. excluded_countries -> list of countries without population data
# feature_value has the value of the column for a particular year of that country
# example: can have value of Lebanon's population for year 2014
def extract_countries_feature_by_year(df,countries_list,feature,year,record="BiocapPerCap"):
    excluded_countries=[]
    feature_values=[]
    available_countries=[]
    # Looping through list of arab countries
    for i in range (0,len(countries_list)):
        country_by_record = extract_country_by_record(df,countries_list[i],record)
        feature_value = country_by_record.loc[lambda df1: country_by_record.year == year][feature].values
        if  feature_value.size==0 or math.isnan(feature_value[0]) :
            excluded_countries.append(countries_list[i])
        else:
            feature_values.append(feature_value[0]) 
            available_countries.append(countries_list[i])
            
#  Uncomment if you need to print the excluded countries in the year
#     if len(excluded_countries) != 0:
#         print("excluded countries in {0} are : ".format(year))
#         for i in excluded_countries:
#             print(i)
    return feature_values, available_countries, excluded_countries 


def print_excluded_countries (excluded_countries,year):
    if len(excluded_countries) != 0:
        print("excluded countries from dataset in {0} are : ".format(year))
        for i in excluded_countries:
            print(i)   
            
# Returns percentage_growth_rate
def calculate_growth_rate(present,past,period):
    #present : present year , past: past year , period: number of years between present and past
    percentage_growth_rate = ((present - past)/(past*period))*100
    return percentage_growth_rate


### Arab countries distrbution according to regions

In [6]:
region=[]
# sub_region=[]

for country in arab_countries:
    region.append(arab_df[arab_df.country.isin([country])]["UNRegion"].unique()[0])
#     sub_region.append(arab_df[arab_df.country.isin([country])]["UN_subregion"].unique()[0])

# There are 11 arab countries in Asia and 10 in Africa
# region_labels holds the values of the regions
# region_values holds values of the count of how many countries are there in that region
region_labels = pd.Series(region).value_counts().index
region_values = pd.Series(region).value_counts().values


trace0  = go.Bar(x= region_labels,
                 y= region_values,
                 marker=dict(color='#f0000a',
                             line=dict(color='rgb(8,48,107)',
                                       width=0.5,)),
                 opacity=0.5,
                name = 'region',
                hoverinfo="x + y")
go_plot = [trace0]
layout = go.Layout(
    title='Arab countries distrbution according to UN regions',)
fig = go.Figure(data=go_plot, layout=layout)

py.iplot(fig)

### Population Analysis

In [7]:
population, available_countries, excluded_countries = extract_countries_feature_by_year(arab_df,arab_countries,'population',2014)

# Manually adding population for Sudan
available_countries.append("Sudan")
population.append(37737900)


population_df = pd.DataFrame({'country':available_countries,'population':population}).sort_values(by='population',ascending=True) 
population_list = list (population_df['population'])
countries = list (population_df['country'])
annotations = []
y_nw = np.array(population_list)

for ydn,  xd in zip(y_nw, countries):
    # labeling the scatter savings
    annotations.append(dict(xref='x', yref='y',
                            y=xd, x=ydn + 5000000,
                            text='{:,} M'.format(np.round(ydn/10**6,2)),
                            font=dict(family='Arial', size=12,
                                      color='rgb(50, 0, 50)'),
                            showarrow=False))
fig  = {
  "data": [
    {
      "values": population_list,
      "labels": countries,
      "hoverinfo":"label+percent",
      "hole": .3,
      "type": "pie",
      'domain': {'x': [.4, 1],
                       'y': [0.2, .8]},
            'hoverinfo':'label+percent',
            'textinfo':'percent'
    },
      {
          "x": population_list,
          "y": countries,
          "type": "bar",
          "orientation" :'h',
          "hoverinfo":"x",
          "marker" : dict(color='rgba(128, 0, 128,0.7)',
                          line=dict(color='rgb(255,0,255)',
                                       width=2)),
          
          "opacity":0.7,
          "name":"Population",
          } 
  ],
  "layout": {
        "title":"Arab countries population 2014",
        'annotations': annotations,
        "yaxis":dict(
            showgrid=False,
            showline=False,
            showticklabels=True,
            tickfont=dict(family='Arial', size=12,color='rgb(50, 0, 50)')),
        "width": 1000,
        "height":700,
    "paper_bgcolor":'rgb(250, 240, 250)',
    "plot_bgcolor":'rgb(250, 240, 250)',
      
}}


py.iplot(fig)

In [8]:
# traces will contain a list of each country's population for that particular year
# example: Egypyt{1961: 234235 ..... 2018: 54657}, Lebanon{1961: 234235 ..... 2018: 54657} etc.
traces = []
annotations = []

# For each arab country, append the info to a trace
for i in range(len(arab_countries)):
    country_by_record = extract_country_by_record(arab_df,arab_countries[i],'BiocapPerCap')
    traces.append(go.Scatter(
            x=country_by_record['year'],
            y=country_by_record['population'],
            mode='lines',
            line=dict(color=colors[i], width=1.5),
            text= arab_countries[i],
            hoverinfo="text + x + y",
            connectgaps=True,
            name =arab_countries[i],
            textfont=dict(family='Arial', size=12),
    ))
    
layout = go.Layout(
    title = "Arab countries population growth",
   xaxis=dict(
            showline=True,
            showgrid=True,
            showticklabels=True,
            linecolor='rgb(150, 150, 150)',
            linewidth=2,
            gridcolor='rgb(90, 90, 90)',
            ticks='outside',
            tickcolor='rgb(80, 80, 80)',
            tickwidth=2,
            ticklen=5,
            tickfont=dict(
            family='Arial',
            size=13,
            color='rgb(180, 180, 180)',
        ),
    ),
    yaxis=dict(
            showgrid=True,
            zeroline=True,
            showline=False,
            gridcolor='rgb(80, 80, 80)',
            showticklabels=True,
            tickcolor='rgb(150, 150, 150)',
            tickwidth=2,
            ticklen=5,
            tickfont=dict(
            family='Arial',
            size=13,
            color='rgb(180, 180, 180)')
    ),
   font=dict(family='Arial', size=12,
            color='rgb(180, 180, 180)'),
            showlegend=True, 
            width = 900,
            height = 700,
            paper_bgcolor='rgba(0, 0, 0,.9)',
            plot_bgcolor='rgba(0, 0, 0,0)'
)
    
fig = go.Figure(data=traces, layout= layout)
py.iplot(fig)

In [9]:
# Population analysis for 2000 and 2010
population_2000,available_countries,excluded_countries_2000=extract_countries_feature_by_year(arab_df,arab_countries,'population',2000)
population_2010,available_countries,excluded_countries_2010=extract_countries_feature_by_year(arab_df,arab_countries,'population',2010)


population_growth_rate = []
# For each country, finding growth rate of population in 10 years from 2000 to 2010
for i in range (0,len(population_2000)):
    growth_rate = np.round(calculate_growth_rate(population_2010[i],population_2000[i],10),2)
    population_growth_rate.append(growth_rate)
    
growth_rate_df = pd.DataFrame({"country":available_countries,"growth rate":population_growth_rate}).sort_values(by="growth rate",ascending=False)
print_excluded_countries(excluded_countries_2000, 2000)  
print_excluded_countries(excluded_countries_2010, 2010)

table = go.Table(header=dict(values=['Country', 'Growth rate']),
        cells=dict(values= [growth_rate_df['country'],growth_rate_df['growth rate'].astype(str)+"%"]))
py.iplot([table])

excluded countries from dataset in 2000 are : 
Saudi Arabia
excluded countries from dataset in 2010 are : 
Saudi Arabia


In [10]:
growth_rate_df =growth_rate_df.sort_values(by="growth rate",ascending=True)
trace0  = go.Bar(x= growth_rate_df["growth rate"],
                 y= growth_rate_df["country"],
                 orientation ='h',
                 marker=dict(color='rgba(255, 255, 0, 1.0)',
                             line=dict(color='rgba(250, 80, 0, 1.0)',
                                       width=4)),
                opacity=0.7,
                hoverinfo="x + y")

annotations = []
y_nw = np.array(growth_rate_df["growth rate"])
for ydn,  xd in zip(y_nw, growth_rate_df["country"]):
    # labeling the scatter savings
    annotations.append(dict(xref='x', yref='y',
                            y=xd, x=ydn+0.9,
                            text='{:,} %'.format(np.round(ydn,2)),
                            font=dict(family='Arial', size=12,
                                      color='rgba(250, 80, 0, 1.0)'),
                            showarrow=False))
layout = go.Layout(
                title='Arab countries annual growth rate of population [2000-2010]',
                margin=dict(
                        l=130,
                        r=20,
                        t=30,
                        b=30,
                    ),
               annotations = annotations,
               xaxis=dict(showgrid=True,
                       gridcolor="rgba(250, 80, 0, .2)",
                       showticklabels=True,
                       tickfont=dict(family='Arial', size=12,color='rgba(250, 80, 0, 1.0)')),
               yaxis=dict(showgrid=False,
                       showline=True,
                       linecolor='rgba(250, 80, 0, 1.0)',
                       showticklabels=True,
                       tickfont=dict(family='Arial', size=12,color='rgba(250, 80, 0, 1.0)')),
               font=dict(family='Arial', size=12,
                       color='rgba(250, 80, 0, 1.0)'),
               width = 1000,
               height = 600,
               paper_bgcolor='rgba(0, 0, 0,1)',
               plot_bgcolor='rgba(0, 0, 0,0)',
             )
# Adding labels
# Creating two subplots


fig = go.Figure(data=[trace0], layout=layout)

py.iplot(fig)

In [11]:
# List with sum of population of all countries for each year
arab_countries_population = []
for year in years:
    sum_population_per_year = np.array(extract_countries_feature_by_year(arab_df,arab_countries,'population',year)[0]).sum()
#     print(year, sum_population_per_year)
    arab_countries_population.append(sum_population_per_year)

In [12]:
arab_population_growth_rate = calculate_growth_rate(arab_countries_population[49],arab_countries_population[24],25)
trace0 = go.Scatter(
    x= years[24:49],
    y= arab_countries_population[24:49],
    hoverinfo = 'name+x+y',
    name='Population',
    mode = "lines",
    line=dict(
        color='rgba(220,220,150,1)',
        width= 3)
    )
layout = go.Layout(
    title = "Arab countries total population growth from 1985 to 2010",
    annotations = [dict(xref = 'x', yref = 'y',
                      x = 1990, y = arab_countries_population[45],
                      text='growth rate = {0} %'.format(np.round(arab_population_growth_rate,2)),
                      font=dict(family='Arial', size=20,
                                color='rgba(200, 150, 0, 1.0)'),
                      showarrow=False)],

    xaxis=dict(
        showline=False,
        showgrid=True,
        showticklabels=True,
        linecolor='rgb(150, 150, 150)',
        linewidth=2,
        gridcolor='rgb(90, 90, 90)',
        ticks='outside',
        tickcolor='rgb(80, 80, 80)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Arial',
            size=13,
            color='rgb(180, 180, 180)',
        ),
    ),
    yaxis=dict(
        showgrid=True,
        zeroline=True,
        showline=False,
        gridcolor='rgb(80, 80, 80)',
        showticklabels=True,
        tickcolor='rgb(180, 180, 180)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Arial',
            size=13,
            color='rgb(180, 180, 180)')
    ),
   font=dict(family='Arial', size=12,
             color='rgb(180, 180, 180)'),
    showlegend=True, 
    width = 900,
    height = 700,
    paper_bgcolor='rgba(0, 0, 0,.95)',
    plot_bgcolor='rgba(0, 0, 0,0)',
)
    
fig = go.Figure(data=[trace0], layout= layout)
py.iplot(fig)

### GDP Analysis

##### Why is it important to know GDP?
Because GDP provides a direct indication of the health and growth of the economy, businesses can use GDP as a guide to their business strategy. Government entities, such as the Fed in the U.S., use the growth rate and other GDP stats as part of their decision process in determining what type of monetary policies to implement.
https://www.investopedia.com/terms/g/gdp.asp#toc-how-to-use-gdp-data

In [13]:
traces = []
annotations = []
for i in range(len(arab_countries)):
    country_by_record = extract_country_by_record(arab_df,arab_countries[i],'BiocapPerCap')
    traces.append(go.Scatter(
        x=country_by_record['year'],
        y=country_by_record['GDP'],
        mode='lines',
        line=dict(color=colors[i], width=1.5),
        text= arab_countries[i]+"<br>"+ country_by_record['GDP'].dropna().apply(lambda x:int(x)).astype(str)+" $",
        hoverinfo="text + x ",
        connectgaps=True,
        name =arab_countries[i],
        textfont=dict(family='Arial', size=12),
    ))

world_by_record = extract_country_by_record(df,'World','BiocapPerCap')
traces.append(go.Scatter(
        x=world_by_record['year'],
        y=world_by_record['GDP'],
        mode='lines',
        line=dict(color="rgb(255,0,0)", width=2.5, dash = 'dash'),
        text= "World"+"<br>"+ world_by_record['GDP'].dropna().apply(lambda x:int(x)).astype(str)+" $",
        hoverinfo="text + x ",
        connectgaps=True,
        name ="World",
        textfont=dict(family='Arial', size=12),
    ))
    
layout = go.Layout(
    title = "Arab countries GDP per capita",
    xaxis=dict(
        showline=True,
        showgrid=True,
        showticklabels=True,
        linecolor='rgb(150, 150, 150)',
        linewidth=2,
        gridcolor='rgb(90, 90, 90)',
        ticks='outside',
        tickcolor='rgb(80, 80, 80)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Arial',
            size=13,
            color='rgb(180, 180, 180)',
        ),
    ),
    yaxis=dict(
        showgrid=True,
        zeroline=True,
        showline=False,
        gridcolor='rgb(80, 80, 80)',
        showticklabels=True,
        tickcolor='rgb(150, 150, 150)',
        tickwidth=2,
        ticklen=5,
        tickfont=dict(
            family='Arial',
            size=13,
            color='rgb(180, 180, 180)')
    ),
   font=dict(family='Arial', size=12,
             color='rgb(180, 180, 180)'),
    showlegend=True, 
    width = 900,
    height = 700,
    paper_bgcolor='rgba(0, 0, 0,.95)',
    plot_bgcolor='rgba(0, 0, 0,0)',
)
    
fig = go.Figure(data=traces, layout= layout)
py.iplot(fig)

In [14]:
## Need ISO alpha-3 code column

# gdp,avilable_countries,excluded_countries = extract_countries_feature_by_year (arab_df,arab_countries,'GDP',2014,record="BiocapPerCap")
# locations = []
# countries = avilable_countries + excluded_countries 
# for c in countries:
#     country_by_record = extract_country_by_record(arab_df,c,"BiocapPerCap")
#     code = country_by_record.loc[lambda df1: country_by_record.year == 2014]['ISO alpha-3 code'].values
#     if  not (code.size==0):
#         locations.append (country_by_record.loc[lambda df1: country_by_record.year == 2014]['ISO alpha-3 code'].values[0])
#     else:
#         locations.append("SDN")    # only sudan dose not have and data after 2014 so it return empety array
    
# for i in range(len(excluded_countries)):
#     gdp.append("NAN")  
# data = [ dict(
#         type = 'choropleth',
#         locations = locations,
#         z = gdp,
#         text = countries  ,
#         colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
#             [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
#         autocolorscale = False,
#         reversescale = True,
#         marker = dict(
#             line = dict (
#                 color = 'rgb(180,180,180)',
#                 width = 0.5 )
#         ),
#         colorbar = dict(
#             autotick = False,
#             tickprefix = '$',
#             title = 'GDP per capita<br> US$'),
#       ) ]

# layout = dict(
#     title = '2014 Arab countries GDP per capita',
#     geo = dict(
#         showframe = False,
#         showcoastlines = True,
#         projection = dict(
#             type = 'equirectangular'
#         )
#     )
#     )

# fig = dict( data=data, layout=layout )
# py.iplot( fig, validate=False )

In [15]:
GDP,available_countries, excluded_countries=extract_countries_feature_by_year(arab_df,arab_countries,'GDP',2014)
GDP_df = pd.DataFrame({'country':available_countries,'GDP':GDP}).sort_values(by='GDP',ascending=True)
trace0  = go.Bar(x= GDP_df["GDP"],
                 y= GDP_df["country"],
                 orientation ='h',
                 marker=dict(color='rgba(255, 255, 0, 1.0)',
                             line=dict(color='rgba(250, 80, 0, 1.0)',
                                       width=4)),
                 opacity=0.7,
                 name="GDP",
                hoverinfo="name+x + y")

annotations = []
y_nw = np.array(GDP_df["GDP"])
for ydn,  xd in zip(y_nw, GDP_df["country"]):
    # labeling the scatter savings
    annotations.append(dict(xref='x', yref='y',
                            y=xd, x=ydn+3000,
                            text='{:,} $'.format(np.int(ydn)),
                            font=dict(family='Arial', size=12,
                                      color='rgba(250, 80, 0, 1.0)'),
                            showarrow=False))
    
layout = go.Layout(
    title='2014 Arab countries GDP per cabita',
     margin=dict(
        l=130,
        r=20,
        t=30,
        b=30,
    ),
    annotations = annotations,
    xaxis=dict(showgrid=True,
               gridcolor="rgba(250, 80, 0, .2)",
               showticklabels=True,
               tickfont=dict(family='Arial', size=12,color='rgba(250, 80, 0, 1.0)')),
    yaxis=dict(showgrid=False,
               showline=False,
               linecolor='rgba(250, 80, 0, 1.0)',
               showticklabels=True,
               tickfont=dict(family='Arial', size=12,color='rgba(250, 80, 0, 1.0)')),
    font=dict(family='Arial', size=12,
             color='rgba(250, 80, 0, 1.0)'),
    width = 1000,
    height = 600,
    paper_bgcolor='rgba(0, 0, 0,1)',
    plot_bgcolor='rgba(0, 0, 0,0)',
             )
# Adding labels
# Creating two subplots


fig = go.Figure(data=[trace0], layout=layout)

py.iplot(fig)

### Population and GDP

In [16]:
countries = extract_countries_feature_by_year(arab_df,arab_countries,'GDP',2014)[1]
y = extract_countries_feature_by_year(arab_df,countries,'GDP',2014)[0]
x = extract_countries_feature_by_year(arab_df,countries,'population',2014)[0]

colors = np.random.rand(22)
text = []
for i in range (len(countries)):
    text.append(countries[i]+"<br>"+"GDP Percap: {0} K".format(np.round((y[i]/10**3),2))+"<br>"+"population: {0} M".format(np.round((x[i]/10**6),2)))
annotations = []
y_nw = np.array(y)
for ydn,  xd , c in zip(y_nw, x,countries):
    # labeling the scatter savings
    annotations.append(dict(xref='x', yref='y',
                            y=ydn, x=xd,
                            text= c,
                            font=dict(family='Raleway', size=12,
                                      color='rgba(50, 50, 50, 1.0)'),
                           showarrow=False))
# The marker size is proportional to population
trace = go.Scatter(x=x,
                y=y,
                text = text,
                mode='markers',
                hoverinfo = 'text ',
                marker={'size': x,        
                        'color': colors,
                        'opacity': 0.6,
                        'sizemode' : 'area',
                        'sizeref' : 40000,
                        'colorscale': 'Viridis'
                       });
layout = go.Layout(title = " GDP and Population",
                  yaxis=dict(title = "GDP per capita"),
                  xaxis=dict(title = "Population"),
                  height = 700)
fig = go.Figure(data=[trace],layout = layout)
py.iplot(fig)

## Ecological Footprint Analysis
### Correlation between features according to Ecological footprint

In [17]:
arab_consumption_corr=arab_df[arab_df.record.isin(["EFConsPerCap"])].drop('year',axis=1).corr()
cons_heatmap = go.Heatmap(z=arab_consumption_corr.values,x=arab_consumption_corr.index,y=arab_consumption_corr.index)
layout = go.Layout(title = "Correlation between features according to ecological footprint (per capita)")
fig = go.Figure(data=[cons_heatmap], layout=layout)

py.iplot(fig)

### Correlation between features according to biocapcity

In [18]:
biocapcity_corr=arab_df[arab_df.record.isin(["BiocapPerCap"])].drop(['year','carbon'],axis=1).corr()
biocap_heatmap = go.Heatmap(z=biocapcity_corr.values,x=biocapcity_corr.index,y=biocapcity_corr.index)
layout = go.Layout(title = "Correlation between features according to biocapcity")
fig = go.Figure(data=[biocap_heatmap], layout=layout)

py.iplot(fig)

### Countries Biocapacity vs Ecological Footprint

The higher the gab between the biocapacity and the consumption for biocapacity means that the country has the enough natural resources to cover its needs. So, in this section we will analyse the available data for biocapacity and Ecological Footprint Consumption (EFCons) to see if the resources is enough to cover people consumption.

In [19]:
record ={1:['BiocapPerCap','EFConsPerCap','rgba(0,255,0,1)','rgba(255,0,0,1)'],
         2:['BiocapTotGHA','EFConsTotGHA','rgba(0,140,0,1)','rgba(140,0,0,1)'],}

for c in range (0,len(arab_countries)):
    fig = tools.make_subplots(rows=1, cols=2, specs=[[{},{}]], horizontal_spacing=0.1,
                         subplot_titles=["BioCapacity vs Ecological footprint (per capita)","BioCapacity vs Ecological footprint (GHA)"])
    for r in record.keys():
            country_by_record_bio = extract_country_by_record(arab_df,arab_countries[c],record[r][0])
            country_by_record_cons = extract_country_by_record(arab_df,arab_countries[c],record[r][1])
            trace1 = go.Scatter(
            x=country_by_record_bio['year'],
            y=country_by_record_bio['total'],
            mode= 'lines',
            name = record[r][0],
            line=dict(color=record[r][2], width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )
            trace2 = go.Scatter(
            x=country_by_record_cons['year'],
            y=country_by_record_cons['total'],
            mode='lines',
            name = record[r][1],
            line=dict(color=record[r][3], width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )
            data= [trace1,trace2]

            fig.append_trace(trace1, 1, r)
            fig.append_trace(trace2, 1, r)
    fig['layout'].update(height=450, width=1000,
                     title=arab_countries[c])
    py.iplot(fig)


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



For all Arab countries as one big country

In [20]:
Arab_BiocapTotal = []
Arab_EFConsTotal = []
Arab_BiocapPerCap = []
Arab_EFConsPerCap = []
world_BiocapTotal = []
world_EFConsTotal = []
mean_BiocapPerCap = []
mean_EFConsPerCap = []
for year in years :
    sum_BiocapTotal_value = np.array(extract_countries_feature_by_year(arab_df,arab_countries,'total',year,record= 'BiocapTotGHA')[0]).sum()
    sum_EFConsTotal_value = np.array(extract_countries_feature_by_year(arab_df,arab_countries,'total',year,record='EFConsTotGHA')[0]).sum()
    sum_population_per_year = np.array(extract_countries_feature_by_year(arab_df,arab_countries,'population',year)[0]).sum()
    world_BiocapTotal.append(np.array(extract_countries_feature_by_year(df,['World'],'total',year,record= 'BiocapTotGHA')[0]))
    world_EFConsTotal.append(np.array(extract_countries_feature_by_year(df,['World'],'total',year,record= 'EFConsTotGHA')[0]))
    Arab_BiocapTotal.append(sum_BiocapTotal_value)
    Arab_EFConsTotal.append(sum_EFConsTotal_value)
    Arab_BiocapPerCap.append(sum_BiocapTotal_value/sum_population_per_year)
    Arab_EFConsPerCap.append(sum_EFConsTotal_value/sum_population_per_year)
    mean_BiocapPerCap.append(np.array(extract_countries_feature_by_year(arab_df,arab_countries,'total',year)[0]).mean())
    mean_EFConsPerCap.append(np.array(extract_countries_feature_by_year(arab_df,arab_countries,'total',year,record='EFConsPerCap')[0]).mean())

In [21]:
fig = tools.make_subplots(rows=2, cols=2, specs=[[{},{}],[{'colspan': 2}, None]], horizontal_spacing=0.1, vertical_spacing = 0.1,
                         subplot_titles=["Per capita","Mean (per capita)","Total (GHA)"])

arab_biocapPerCap_plt = go.Scatter(
            x=years[19:],
            y=Arab_BiocapPerCap[19:],
            mode= 'lines',
            name = "Biocapcity",
            line=dict(color="green", width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )
arab_EFperCap_plt = go.Scatter(
            x=years[19:],
            y=Arab_EFConsPerCap[19:],
            mode='lines',
            name = "Ecological footprint",
            line=dict(color="red", width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )

fig.append_trace(arab_biocapPerCap_plt, 1, 1)
fig.append_trace(arab_EFperCap_plt, 1, 1)

arab_meanBiocapPerCap_plt = go.Scatter(
            x=years[19:],
            y=mean_BiocapPerCap[19:],
            mode= 'lines',
            showlegend = False,
            line=dict(color="green", width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )
arab_meanEFperCap_plt = go.Scatter(
            x=years[19:],
            y=mean_EFConsPerCap[19:],
            mode='lines',
            showlegend = False,
            line=dict(color="red", width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )

fig.append_trace(arab_meanBiocapPerCap_plt, 1, 2)
fig.append_trace(arab_meanEFperCap_plt, 1, 2)

arab_totalBiocap_plt = go.Scatter(
            x=years[19:],
            y=Arab_BiocapTotal[19:],
            mode= 'lines',
            showlegend = False,
            line=dict(color="green", width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )
arab_totalEF_plt = go.Scatter(
            x=years[19:],
            y=Arab_EFConsTotal[19:],
            mode='lines',
            showlegend = False,
            line=dict(color="red", width=1.5),
            hoverinfo="y + x ",
            textfont=dict(family='Arial', size=12),
        )

fig.append_trace(arab_totalBiocap_plt, 2, 1)
fig.append_trace(arab_totalEF_plt, 2, 1)

            
fig['layout'].update(height=900, width=1000,
                     title= "Arab World BioCapacity vs Ecological footprint")
py.iplot(fig)

### Analysing the Deficit/Reserve values of Ecological footprint

The deficit happens if the needs of the country exeeds its natural resources ability to renew itself. It means that they need extra resources to cover their needs. In contrary, the reserve happens if the needs could be covered by the available natural resources of the country or region.

In [22]:
difference  = []
countries_list = []
deficit_or_reserve = []

# Foreach arab country, find if it is deficit or reserve
for country in arab_countries:
    BiocapPerCap=np.array(extract_countries_feature_by_year(arab_df,[country],'total',2014)[0])
    EFConsPerCap=np.array(extract_countries_feature_by_year(arab_df,[country],'total',2014,record="EFConsPerCap")[0])
    difference_value = BiocapPerCap - EFConsPerCap
    if difference_value < 0 :
        deficit_or_reserve.append ("deficit")
        difference.append(np.abs(difference_value[0]))
    if difference_value > 0 :
        deficit_or_reserve.append("reserve")
        difference.append(difference_value[0])
    if difference_value.size==0:
        deficit_or_reserve.append("nan")
        difference.append(np.NAN)
    countries_list.append(country)
    
defict_reserve_df = pd.DataFrame({"country":countries_list,"deficit/reserve":deficit_or_reserve,"value":difference}).dropna().sort_values(by="value",ascending=False)

In [23]:
trace0 = go.Bar(
    y=defict_reserve_df[defict_reserve_df['deficit/reserve'].isin(['deficit'])]['country'],
    x=defict_reserve_df[defict_reserve_df['deficit/reserve'].isin(['deficit'])]['value'],
    orientation ='h',
    name='Deficit',
    marker=dict(
        color='rgb(180,0,0)'
    )
)
trace1 = go.Bar(
    y=defict_reserve_df[defict_reserve_df['deficit/reserve'].isin(['reserve'])]['country'],
    x=defict_reserve_df[defict_reserve_df['deficit/reserve'].isin(['reserve'])]['value'],
    orientation ='h',
    name='Reserve',
    marker=dict(
        color='rgb(0,180,0)',
    )
)
data = [trace0, trace1]
layout = go.Layout(title = "Arab countries Ecological footprint [Deficit/Reserve] (per capita) 2014",
                   yaxis = dict(showline = False,
                               zeroline = False),
                   width=900,height=500,
                   margin=dict(
                        l=140,
                        r=20,
                        t=30,
                        b=30)
                   )
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [24]:
defict_reserve_df

Unnamed: 0,country,deficit/reserve,value
10,Qatar,deficit,14.476581
14,United Arab Emirates,deficit,9.195708
2,Bahrain,deficit,8.176368
9,Kuwait,deficit,7.038156
8,Saudi Arabia,deficit,5.589141
12,Oman,deficit,4.774003
3,Libyan Arab Jamahiriya,deficit,3.645192
16,Lebanon,deficit,3.043276
20,Djibouti,deficit,2.184452
6,Mauritania,reserve,2.047657


### Comparing the GDP with Deficit

In [25]:
def extract_defict_value (df, country_name):
    # If country is deficit
    c = df[df.country.isin([country_name])]
    if c['deficit/reserve'].values.size != 0 :
        if (c['deficit/reserve'].values[0]=='deficit'):
            return c['value'].values[0]
         
deficit_value = []
for c in countries :
    if extract_defict_value(defict_reserve_df, c) != None:
        deficit_value.append(extract_defict_value(defict_reserve_df,c))
    else:
        continue

In [26]:
deficit_value

[1.5106796200000001,
 1.9072246539999997,
 8.176368091,
 1.926813832,
 1.7187622129999998,
 0.9791936779999999,
 5.589141066,
 7.0381559330000005,
 14.476581175,
 4.7740032889999995,
 1.383392359,
 9.195707987,
 0.5786527430000001,
 3.043275665,
 0.511552453,
 2.184451832]

In [27]:
countries = extract_countries_feature_by_year(arab_df,arab_countries,'GDP',2014)[1]
countries.remove('Mauritania')     # remove Mauritania as it has no deficit 
x = extract_countries_feature_by_year(arab_df,countries,'GDP',2014)[0]
y = deficit_value
colors = np.random.rand(100)
sz = (np.array(y)*10000)
text = []
for i in range (len(countries)):
    text.append(countries[i]+"<br>"+"GDP: {0} K".format(np.round((x[i]/10**3),2))+"<br>"+"Deficit: {0}".format(np.round((y[i]),2)))

annotations = []
y_nw = np.array(y)
for ydn,  xd , c in zip(y_nw, x,countries):
    # labeling the scatter savings
    annotations.append(dict(xref='x', yref='y',
                            y=ydn, x=xd,
                            text= c,
                            font=dict(family='Raleway', size=11,
                                      color='rgba(50, 50, 50, 1.0)'),
                            showarrow=False))
trace = go.Scatter(x=x,
                y=y,
                text = text,
                mode='markers',
                hoverinfo = 'text ',
                marker={'size': sz,
                        'color': colors,
                        'opacity': 0.5,
                        'sizemode' : 'area',
                        'sizeref' : 80,
                        'colorscale': 'Viridis'
                       });
layout = go.Layout(title= " Ecological deficit and GDP",
                  yaxis=dict(title = "Ecological deficit (per capita)"),
                  xaxis=dict(title = "GDP per capita"),
                  annotations = annotations,
                  height = 700,
                  width = 1500)
fig = go.Figure(data=[trace],layout = layout)
py.iplot(fig)

The plot shows that the richest countries has the highest deficit and the poorest one has the lowest deficit. It indictes that the higher the income in the country the higher the consumption of its residents as they have the ability to buy more. The contrast happens with the poorest countries.

### Calculate the Earth Overshoot Day

Earth Overshoot Day (EOD), is the calculated illustrative calendar date on which humanity’s resource consumption for the year exceeds Earth’s capacity to regenerate those resources that year. Earth Overshoot Day is calculated by dividing the world biocapacity (the amount of natural resources generated by Earth that year) by the ecological footprint.

EarthOvershootDay=(Biocapacity/EcologicalFootprint)x365

In [28]:
import datetime
arab_eod_dates = []
eod_dates_world=[]

def calc_earth_overshot_day(biocap,ecofootp):
    eod = (np.array(biocap) / np.array(ecofootp))*365
    return eod

eod_arab = calc_earth_overshot_day(Arab_BiocapTotal,Arab_EFConsTotal)
eod_world = calc_earth_overshot_day(world_BiocapTotal,world_EFConsTotal)
eod_month_arab = []
eod_month_world = []

for i in range (0,len(eod_arab)):
    if eod_arab[i]>365:
        arab_eod_dates.append("no EOD")
        eod_month_arab.append("no EOD")
    if eod_world[i]>365:
        eod_dates_world.append("no EOD")
        eod_month_world.append("no EOD")
    if eod_arab[i] < 365:
        date_arab = datetime.datetime(years[i],1,1) + datetime.timedelta(days=eod_arab[i])
        eod_month_arab.append(date_arab.strftime('%b'))
        arab_eod_dates.append(date_arab.strftime('%b-%d'))
    if eod_world[i] < 365:
        date_world = datetime.datetime(years[i],1,1) + datetime.timedelta(days=int(eod_world[i]))
        eod_month_world.append(date_world.strftime('%b'))
        eod_dates_world.append(date_world.strftime('%b-%d'))
        #[19:] represents the year that the EOD begins to appear 

print(len(years))
print(len(arab_eod_dates))
print(len(eod_dates_world))
eod_df = pd.DataFrame({"year":years[19:],"Arab Earth Overshoot Day":arab_eod_dates[19:],"World Earth Overshoot Day":eod_dates_world[19:]})
eod_df

58
58
58


Unnamed: 0,year,Arab Earth Overshoot Day,World Earth Overshoot Day
0,1980,Dec-13,Nov-03
1,1981,Nov-07,Nov-11
2,1982,Oct-20,Nov-15
3,1983,Oct-01,Nov-14
4,1984,Sep-17,Nov-05
5,1985,Sep-17,Nov-04
6,1986,Sep-20,Oct-30
7,1987,Sep-14,Oct-23
8,1988,Sep-11,Oct-14
9,1989,Sep-01,Oct-12


In [29]:
EOD_arab_plt = go.Scatter(
            x=eod_df['year'],
            y= eod_month_arab[19:],
            mode= 'lines',
            name = "Arab EOD",
            text = eod_df["Arab Earth Overshoot Day"],
            line=dict(color="green", width=1.5),
            hoverinfo="name + text + x ",
            textfont=dict(family='Arial', size=12),
        )
EOD_world_plt = go.Scatter(
            x=eod_df['year'],
            y= eod_month_world[19:],
            mode= 'lines',
            name = "World EOD",
            text = eod_df["World Earth Overshoot Day"],
            line=dict(color="red", width=1.5),
            hoverinfo="name + text + x ",
            textfont=dict(family='Arial', size=12),
        )
layout = go.Layout(title= "Earth Overshoot Day",
                  yaxis=dict(title = "Month"),
                  xaxis=dict(title = "Year"),
                  height = 500,
                  width = 800)

fig = go.Figure(data=[EOD_arab_plt,EOD_world_plt],layout=layout)
py.iplot(fig)

### Carbon Footprint

The carbon footprint is the required amount of forest land to absorb the emitted carbon. the number reflects the amount of emitted carbon for every country.

In [30]:
Arab_carbon,available_countries,excluded_countries=extract_countries_feature_by_year(arab_df,arab_countries,'carbon',2014,record="EFConsPerCap")
carbon_df = pd.DataFrame({'country':available_countries,'carbon':Arab_carbon}).sort_values(by='carbon',ascending=False)

In [31]:
avail_countris = extract_countries_feature_by_year(arab_df,carbon_df['country'],'GDP',2014)[1]
y = avail_countris
x = extract_countries_feature_by_year(arab_df,avail_countris,'GDP',2014)[0]
size = extract_countries_feature_by_year(arab_df,avail_countris,'carbon',2014,record="EFConsPerCap")[0]
colors = size
text = []
for i in range (len(avail_countris)):
    text.append(y[i]+"<br>"+"GDP Percap: {0} K".format(np.round((x[i]/10**3),2))+"<br>"+"EFcarbon: {0} ".format(np.round((np.array(size)[i]),2)))

trace = go.Scatter(x=x,
                y=y,
                text = text,
                mode='markers',
                hoverinfo = 'text ',
                name = "EFCarbon",
                showlegend = False,
                marker={'size': size,        
                        'color': colors,
                        'opacity': 0.6,
                        'sizemode' : 'area',
                        'sizeref' : 0.005,
                        'colorscale': 'Portland',
                         'showscale' : True,
                         'cmax' : np.max(size),
                         'cmin' : np.min(size),
                         'colorbar' : dict( y= 0.52,
                                            len= .8,
                                            x = 1,
                                            title = "EF Carbon",
                                            titlefont = dict(size=15))
                       },
                  
                  );
layout = go.Layout(
                  title = "Ecological footprint of Carbon and GDP (per capita) [2014]",
                  xaxis=dict(title = "GDP (per capita)",
                             titlefont = dict (family = "Arial"),
                             zeroline=False),
                  yaxis=dict(
                        zeroline=True,
                        showticklabels=True,
                        tickfont=dict(family='Arial', size=12)),
                  margin=dict(
                        l=140,
                        r=20,
                        t=40,
                        b=45,),
                  width = 1000,
                  height = 700)
fig = go.Figure(data=[trace],layout = layout)
py.iplot(fig)

The plot shows the strong relation between the GDP and the carbon EF. The countries with high GDP have a high carbon EF. it indicates that they have a high carbon emission so, they need more forests to neutralize this emission.

## Conclusion

The analysis shows that:

- There are strong gabs between the Arab countries especially in the GDP.
- There is strong relation between the GDP and Ecological footprint. The high the GDP, the high the Ecological footprint.
- The high deficit in the Ecological footprint happens with the countries with high GDP.
- In arab countries the high populated countries have low GDP per capita.
- The Carbon Ecological footprint increased proportionally with the GDP.