# AFRICA FOOD SHORTAGE PROBLEMS
Africa food shortage problem using appropriate visuals from data on Africa population, food production and available supply in Africa countries obtained from FAO's resources.

## IMPORTING DEPENDENCIES

In [16]:
# Installing panda profiling requirements for exploratory data analysis
! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip

# Installing plotly, dash, networkx, pivux for visualization
! pip install plotly==5.6.0
! pip install networkx[default]
! pip install pyvis




'! pip install dash\n! pip install networkx[default]\n! pip install pyvis'

## IMPORTING NECESSARY LIBRARIES

In [1]:
import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from pandas_profiling import ProfileReport
from plotly.offline import init_notebook_mode
from pyvis.network import Network
from scipy.stats import pearsonr, chi2_contingency


# Setting visualization styles
init_notebook_mode(connected=True)

## IMPORTING RAW DATASET

In [2]:
# African Food Production
afp = pd.read_csv('African_Food_Production.csv')

# African Food Available Supply
afs = pd.read_csv('African_Food_Supply.csv')

# Total World Population
world_population = pd.read_excel('WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx')

## EXPLORATORY DATA ANALYSIS

In [5]:
# Using panda profile to get an overview of the data
profile = ProfileReport(afp, title="African Food Production", html={'style': {'full_width': True}}, sort=None)
profile.to_notebook_iframe()

Summarize dataset: 100%|██████████| 21/21 [00:23<00:00,  1.11s/it, Completed]                    
Generate report structure: 100%|██████████| 1/1 [00:06<00:00,  6.95s/it]
Render HTML: 100%|██████████| 1/1 [00:03<00:00,  3.34s/it]


In [106]:
# Using panda profile to get an overview of the data
profile = ProfileReport(afs, title="African Food Availablilty/Supply", html={'style': {'full_width': True}}, sort=None)
profile.to_notebook_iframe()

Summarize dataset: 100%|██████████| 20/20 [00:23<00:00,  1.17s/it, Completed]                    
Generate report structure: 100%|██████████| 1/1 [00:06<00:00,  6.91s/it]
Render HTML: 100%|██████████| 1/1 [00:03<00:00,  3.35s/it]


## DATA CLEANING AND PREPARATION

In [3]:
# Changing the header row to the appropriate row
world_population.columns = world_population.loc[15]

# Filter the needed columns
world_population_filtered = world_population[['Region, subregion, country or area *', '2004', '2005', '2006', '2007','2008', '2009', '2010', '2011', '2012', '2013']]

world_population_filtered

15,"Region, subregion, country or area *",2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,,,,,,,,,,,
1,,,,,,,,,,,
2,,,,,,,,,,,
3,,,,,,,,,,,
4,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
300,Bermuda,66.156,66.26,66.24,66.126,65.94,65.675,65.388,65.076,64.737,64.381
301,Canada,31815.49,32164.313,32536.994,32930.795,33337.638,33746.093,34147.566,34539.156,34922.031,35296.535
302,Greenland,56.868,56.949,56.968,56.928,56.836,56.729,56.638,56.555,56.477,56.412
303,Saint Pierre and Miquelon,6.118,6.113,6.16,6.22,6.286,6.343,6.361,6.323,6.251,6.168


In [4]:
# Standardizing all the countries name to match
produce = afp.replace({"Cote d'Ivoire": "Ivory Coast"})
supply = afs.replace({"Cote d'Ivoire": "Ivory Coast"})
world_population_filtered = world_population_filtered.replace({'Eswatini': 'Swaziland', "Côte d'Ivoire": "Ivory Coast"})

In [107]:
# Selecting the African Countries from the Countries in world population data
african_population = pd.DataFrame()
for african_country in produce.Country.unique():
    for country in world_population_filtered['Region, subregion, country or area *'].unique():
        if country == african_country:
            african_population = african_population.append(world_population_filtered.loc[world_population_filtered['Region, subregion, country or area *'] == african_country], ignore_index=True)

# Renaming the Region, subregion, country or area * column to Country
african_population = african_population.rename(columns = {'Region, subregion, country or area *': 'Country'})

# Removing the index name
african_population.keys().name = None

# Resetting the index of the DataFrame to 
african_population = african_population.reset_index(drop=True)

# Using panda profile to get an overview of the data
profile = ProfileReport(african_population, title="African Population", html={'style': {'full_width': True}}, sort=None)
profile.to_notebook_iframe()

Summarize dataset: 100%|██████████| 124/124 [01:24<00:00,  1.47it/s, Completed]                   
Generate report structure: 100%|██████████| 1/1 [00:16<00:00, 16.83s/it]
Render HTML: 100%|██████████| 1/1 [00:07<00:00,  7.19s/it]


In [6]:
# Reshaping the African population dataframe for ease of analysis
african_population = pd.melt(african_population, id_vars='Country', value_vars=['2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013'])
african_population = african_population.rename(columns = {'variable': 'Year', 'value': 'Value'})
african_population = african_population.sort_values(by=['Country', 'Year']).reset_index(drop=True)

# Converting the Value column from object to numeric type
african_population.Value = african_population.Value.apply(pd.to_numeric)

african_population

Unnamed: 0,Country,Year,Value
0,Algeria,2004,32692.153
1,Algeria,2005,33149.720
2,Algeria,2006,33641.007
3,Algeria,2007,34166.976
4,Algeria,2008,34730.604
...,...,...,...
445,Zimbabwe,2009,12526.964
446,Zimbabwe,2010,12697.728
447,Zimbabwe,2011,12894.323
448,Zimbabwe,2012,13115.149


In [8]:
# Sort the dataset by Country and Year
african_population = african_population.sort_values(by=['Country', 'Year']).reset_index(drop=True)
produce = produce.sort_values(by=['Country', 'Year']).reset_index(drop=True)
supply = supply.sort_values(by=['Country', 'Year']).reset_index(drop=True)

## AFRICA FOOD SUPPLY, PRODUCTION AND POPULATION ACROSS COUNTRIES BETWEEN 2004 TO 2013

In [7]:
# The annual food production of african countries
country_production_yearly = produce.groupby(['Country','Year'])['Value'].sum().reset_index()
country_production_yearly

Unnamed: 0,Country,Year,Value
0,Algeria,2004,15536
1,Algeria,2005,15667
2,Algeria,2006,16417
3,Algeria,2007,14763
4,Algeria,2008,13841
...,...,...,...
445,Zimbabwe,2009,5754
446,Zimbabwe,2010,6777
447,Zimbabwe,2011,7551
448,Zimbabwe,2012,8173


In [8]:
# The annual food supply of african countries
country_supply_yearly = supply.groupby(['Country','Year'])['Value'].sum().reset_index()
country_supply_yearly

Unnamed: 0,Country,Year,Value
0,Algeria,2004,2987
1,Algeria,2005,2958
2,Algeria,2006,3047
3,Algeria,2007,3041
4,Algeria,2008,3048
...,...,...,...
445,Zimbabwe,2009,2147
446,Zimbabwe,2010,2168
447,Zimbabwe,2011,2200
448,Zimbabwe,2012,2197


In [9]:
# PLotting boxplots to visualise outliers
fig = go.Figure()
fig.add_trace(go.Box(
    x = country_production_yearly.Year,
    y = country_production_yearly.Value,
    name='Mean',
    marker_color='darkblue',
    boxmean=True # represent mean
))
fig.update_layout(
    title={
        'text': "African Annual Food Production",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [10]:
# PLotting boxplots to visualise outliers
fig = go.Figure()
fig.add_trace(go.Box(
    x = country_supply_yearly.Year,
    y = country_supply_yearly.Value,
    name='Mean',
    marker_color='darkblue',
    boxmean=True # represent mean
))
fig.update_layout(
    title={
        'text': "African Annual Food Supply",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [17]:
# Analysing for outliers in african food production
Q1 = np.percentile(country_production_yearly['Value'], 25,
                   interpolation = 'midpoint')

Q3 = np.percentile(country_production_yearly['Value'], 75,
                   interpolation = 'midpoint')
IQR = Q3 - Q1
food_production_outlier = country_production_yearly.loc[country_production_yearly['Value'] > (Q3+1.5*IQR)].sort_values(by=['Year', 'Value']).reset_index(drop=True)
food_production_outlier

Unnamed: 0,Country,Year,Value
0,South Africa,2004,54949
1,Egypt,2004,75989
2,Nigeria,2004,149857
3,South Africa,2005,59577
4,Egypt,2005,80422
5,Nigeria,2005,158149
6,South Africa,2006,54024
7,Egypt,2006,83191
8,Nigeria,2006,168987
9,South Africa,2007,53795


In [15]:
# Analysing for outliers in african food production
Q1 = np.percentile(country_supply_yearly['Value'], 25,
                   interpolation = 'midpoint')

Q3 = np.percentile(country_supply_yearly['Value'], 75,
                   interpolation = 'midpoint')
IQR = Q3 - Q1
food_supply_outlier = country_supply_yearly.loc[country_supply_yearly['Value'] > (Q3+1.5*IQR)].sort_values(by=['Year', 'Value']).reset_index(drop=True)
food_supply_outlier

Unnamed: 0,Country,Year,Value
0,Egypt,2007,3445
1,Egypt,2008,3490
2,Egypt,2010,3507
3,Egypt,2011,3549
4,Egypt,2012,3561
5,Egypt,2013,3522


From the boxplot and analysis for outliers, it is observed that Nigeria, Egypt and South Africa produced food in all years above the upper limit of the annual food production distribution, and Egypt had large amounts of food available for consumption in 2007, 2008, and 2010 through 2013 above the upper limit of the annual food consumption. These outliers are significant for the food production and consumption statistics and therefore are retained. 

In [16]:
# Total Africa Annual Population
total_population = african_population.groupby('Year')['Value'].sum().reset_index()

# Total Africa Food Production
total_production = produce.groupby('Year')['Value'].sum().reset_index()

# Total Food Available in Supply in Africa
total_supply = supply.groupby('Year')['Value'].sum().reset_index()

In [17]:
# Plotting a bar graph to show the trend of population with production and supply
fig = go.Figure()
fig.add_trace(go.Bar(
    x=total_population.Year, y=total_population.Value,
    name='Total Population',
    marker_color="indianred"
))
fig.add_trace(go.Bar(
    x=total_production.Year, y=total_production.Value,
    name='Total Production',
    marker_color="teal"
))
fig.add_trace(go.Bar(
    x=total_supply.Year, y=total_supply.Value,
    name='Total Supply',
    marker_color="lightsalmon"
))
fig.update_layout(
    title={
        'text': "Trends in Annual Population, Food Production and Supply in Africa",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    barmode='group')
fig.show()

In [12]:
# Mean Africa Annual Population
mean_population = african_population.groupby('Year')['Value'].mean().reset_index()
mean_population

Unnamed: 0,Year,Value
0,2004,17890.901911
1,2005,18322.957867
2,2006,18769.002956
3,2007,19229.327111
4,2008,19705.237467
5,2009,20198.106778
6,2010,20708.776133
7,2011,21237.715978
8,2012,21784.384756
9,2013,22347.240978


In [13]:
# Mean Africa Food Production
mean_production = produce.groupby('Year')['Value'].mean().reset_index()
mean_production

Unnamed: 0,Year,Value
0,2004,286.767301
1,2005,298.986592
2,2006,310.814014
3,2007,305.21583
4,2008,318.686851
5,2009,323.040657
6,2010,340.166955
7,2011,351.303633
8,2012,364.831816
9,2013,378.227568


In [14]:
# Mean Food Available in Supply in Africa
mean_supply = supply.groupby('Year')['Value'].mean().reset_index()
mean_supply

Unnamed: 0,Year,Value
0,2004,2394.222222
1,2005,2409.288889
2,2006,2430.8
3,2007,2447.755556
4,2008,2460.755556
5,2009,2482.222222
6,2010,2497.4
7,2011,2515.422222
8,2012,2527.644444
9,2013,2532.244444


In [15]:
print("The mean African Annual Propulation is {}".format(mean_population.Value.mean()))
print("The mean African Annual Food Production is {}".format(mean_production.Value.mean()))
print("The mean African Annual Food Supply is {}".format(mean_supply.Value.mean()))

The mean African Annual Propulation is 20019.365193333335
The mean African Annual Food Production is 327.80412183920464
The mean African Annual Food Supply is 2469.775555555556


In [19]:
# Comparing the mean of annual production and annual supply
production_vs_supply = mean_production.compare(mean_supply, keep_equal=True, keep_shape=True)
production_vs_supply

Unnamed: 0_level_0,Year,Year,Value,Value
Unnamed: 0_level_1,self,other,self,other
0,2004,2004,286.767301,2394.222222
1,2005,2005,298.986592,2409.288889
2,2006,2006,310.814014,2430.8
3,2007,2007,305.21583,2447.755556
4,2008,2008,318.686851,2460.755556
5,2009,2009,323.040657,2482.222222
6,2010,2010,340.166955,2497.4
7,2011,2011,351.303633,2515.422222
8,2012,2012,364.831816,2527.644444
9,2013,2013,378.227568,2532.244444


In [20]:
# Calculating the percentage change in production and supply between years and ther cummulative change from 2004 - 2013
production_vs_supply['self_change'] = production_vs_supply.Value.self.pct_change().mul(100).round(2)
production_vs_supply['other_change'] = production_vs_supply.Value.other.pct_change().mul(100).round(2)
production_vs_supply['self_cumchange'] = production_vs_supply.self_change.cumsum()
production_vs_supply['other_cumchange'] = production_vs_supply.other_change.cumsum()
production_vs_supply = production_vs_supply.fillna(0)
production_vs_supply


Unnamed: 0_level_0,Year,Year,Value,Value,self_change,other_change,self_cumchange,other_cumchange
Unnamed: 0_level_1,self,other,self,other,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2004,2004,286.767301,2394.222222,0.0,0.0,0.0,0.0
1,2005,2005,298.986592,2409.288889,4.26,0.63,4.26,0.63
2,2006,2006,310.814014,2430.8,3.96,0.89,8.22,1.52
3,2007,2007,305.21583,2447.755556,-1.8,0.7,6.42,2.22
4,2008,2008,318.686851,2460.755556,4.41,0.53,10.83,2.75
5,2009,2009,323.040657,2482.222222,1.37,0.87,12.2,3.62
6,2010,2010,340.166955,2497.4,5.3,0.61,17.5,4.23
7,2011,2011,351.303633,2515.422222,3.27,0.72,20.77,4.95
8,2012,2012,364.831816,2527.644444,3.85,0.49,24.62,5.44
9,2013,2013,378.227568,2532.244444,3.67,0.18,28.29,5.62


In [21]:
# Comparing the mean of annual population,  and annual supply
population_vs_supply = mean_population.compare(mean_supply)
population_vs_supply

Unnamed: 0_level_0,Year,Year,Value,Value
Unnamed: 0_level_1,self,other,self,other
0,2004,2004,17890.901911,2394.222222
1,2005,2005,18322.957867,2409.288889
2,2006,2006,18769.002956,2430.8
3,2007,2007,19229.327111,2447.755556
4,2008,2008,19705.237467,2460.755556
5,2009,2009,20198.106778,2482.222222
6,2010,2010,20708.776133,2497.4
7,2011,2011,21237.715978,2515.422222
8,2012,2012,21784.384756,2527.644444
9,2013,2013,22347.240978,2532.244444


In [22]:
# Calculating the percentage change in population size and production between years and ther cummulative change from 2004 - 2013
population_vs_supply['self_change'] = population_vs_supply.Value.self.pct_change().mul(100).round(2)
population_vs_supply['other_change'] = population_vs_supply.Value.other.pct_change().mul(100).round(2)
population_vs_supply['self_cumchange'] = population_vs_supply.self_change.cumsum()
population_vs_supply['other_cumchange'] = population_vs_supply.other_change.cumsum()
population_vs_supply = population_vs_supply.fillna(0)
population_vs_supply

Unnamed: 0_level_0,Year,Year,Value,Value,self_change,other_change,self_cumchange,other_cumchange
Unnamed: 0_level_1,self,other,self,other,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2004,2004,17890.901911,2394.222222,0.0,0.0,0.0,0.0
1,2005,2005,18322.957867,2409.288889,2.41,0.63,2.41,0.63
2,2006,2006,18769.002956,2430.8,2.43,0.89,4.84,1.52
3,2007,2007,19229.327111,2447.755556,2.45,0.7,7.29,2.22
4,2008,2008,19705.237467,2460.755556,2.47,0.53,9.76,2.75
5,2009,2009,20198.106778,2482.222222,2.5,0.87,12.26,3.62
6,2010,2010,20708.776133,2497.4,2.53,0.61,14.79,4.23
7,2011,2011,21237.715978,2515.422222,2.55,0.72,17.34,4.95
8,2012,2012,21784.384756,2527.644444,2.57,0.49,19.91,5.44
9,2013,2013,22347.240978,2532.244444,2.58,0.18,22.49,5.62


In [23]:
# Comparing the mean of annual population, and annual production
population_vs_production  = mean_population.compare(mean_production)
population_vs_production

Unnamed: 0_level_0,Year,Year,Value,Value
Unnamed: 0_level_1,self,other,self,other
0,2004,2004,17890.901911,286.767301
1,2005,2005,18322.957867,298.986592
2,2006,2006,18769.002956,310.814014
3,2007,2007,19229.327111,305.21583
4,2008,2008,19705.237467,318.686851
5,2009,2009,20198.106778,323.040657
6,2010,2010,20708.776133,340.166955
7,2011,2011,21237.715978,351.303633
8,2012,2012,21784.384756,364.831816
9,2013,2013,22347.240978,378.227568


In [24]:
# Calculating the percentage change in population size and supply between years and ther cummulative change from 2004 - 2013
population_vs_production['self_change'] = population_vs_production.Value.self.pct_change().mul(100).round(2)
population_vs_production['other_change'] = population_vs_production.Value.other.pct_change().mul(100).round(2)
population_vs_production['self_cumchange'] = population_vs_production.self_change.cumsum()
population_vs_production['other_cumchange'] = population_vs_production.other_change.cumsum()
population_vs_production = population_vs_production.fillna(0)
population_vs_production


Unnamed: 0_level_0,Year,Year,Value,Value,self_change,other_change,self_cumchange,other_cumchange
Unnamed: 0_level_1,self,other,self,other,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2004,2004,17890.901911,286.767301,0.0,0.0,0.0,0.0
1,2005,2005,18322.957867,298.986592,2.41,4.26,2.41,4.26
2,2006,2006,18769.002956,310.814014,2.43,3.96,4.84,8.22
3,2007,2007,19229.327111,305.21583,2.45,-1.8,7.29,6.42
4,2008,2008,19705.237467,318.686851,2.47,4.41,9.76,10.83
5,2009,2009,20198.106778,323.040657,2.5,1.37,12.26,12.2
6,2010,2010,20708.776133,340.166955,2.53,5.3,14.79,17.5
7,2011,2011,21237.715978,351.303633,2.55,3.27,17.34,20.77
8,2012,2012,21784.384756,364.831816,2.57,3.85,19.91,24.62
9,2013,2013,22347.240978,378.227568,2.58,3.67,22.49,28.29


In [53]:
# Plotting a bar graph to show the percentage change of population, production and supply
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=population_vs_production.Year.self, y=population_vs_production.self_change,
    name='Change in Population',
    marker_color="indianred"
))
fig.add_trace(go.Bar(
    x=population_vs_production.Year.self, y=population_vs_production.other_change,
    name='Change in Production',
    marker_color="teal"
))
fig.add_trace(go.Bar(
    x=population_vs_supply.Year.self, y=population_vs_supply.other_change,
    name='Change in Supply',
    marker_color="lightsalmon"
))
fig.update_layout(
    title={
        'text': "Percentage Changes in Annual Population, Food Production and Supply in Africa ",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    barmode='group')
fig.show()

In [62]:
# Plotting a line graph to show the percentage change of population, production and supply
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=population_vs_production.Year.self, y=population_vs_production.self_change,
    name='Change in Population',
    marker_color="indianred",
    mode = 'lines'
))
fig.add_trace(go.Scatter(
    x=population_vs_production.Year.self, y=population_vs_production.other_change,
    name='Change in Production',
    marker_color="teal",
    mode = 'lines'
))
fig.add_trace(go.Scatter(
    x=population_vs_supply.Year.self, y=population_vs_supply.other_change,
    name='Change in Supply',
    marker_color="lightsalmon",
    mode = 'lines'
))

fig.update_layout(
    title={
        'text': "Percentage Changes in Annual Population, Food Production and Supply in Africa ",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [63]:
# Plotting a line graph to show the percentage change of population, production and supply
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=population_vs_production.Year.self, y=population_vs_production.self_cumchange,
    name='Change in Population',
    marker_color="indianred",
    mode = 'lines'
))
fig.add_trace(go.Scatter(
    x=population_vs_production.Year.self, y=population_vs_production.other_cumchange,
    name='Change in Production',
    marker_color="teal",
    mode = 'lines'
))
fig.add_trace(go.Scatter(
    x=population_vs_supply.Year.self, y=population_vs_supply.other_cumchange,
    name='Change in Supply',
    marker_color="lightsalmon",
    mode = 'lines'
))

fig.update_layout(
    title={
        'text': "Percentage Changes in Annual Population, Food Production and Supply in Africa ",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [51]:
# Plotting a bar graph to show thecumulative percentage change in population, production and supply
fig = go.Figure()
fig.add_trace(go.Bar(
    x=population_vs_production.Year.self, y=population_vs_production.self_cumchange,
    name='Change in Population',
    marker_color="indianred"
))
fig.add_trace(go.Bar(
    x=population_vs_production.Year.self, y=population_vs_production.other_cumchange,
    name='Change in Production',
    marker_color="teal"
))
fig.add_trace(go.Bar(
    x=population_vs_supply.Year.self, y=population_vs_supply.other_cumchange,
    name='Change in Supply',
    marker_color="lightsalmon"
))
fig.update_layout(
    title={
        'text': "Percentage Cumulative Change in Annual Population, Food Production and Supply in Africa from 2004",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    barmode='group')
fig.show()

In [23]:
# Test the null hypothesis that production has no relation with supply
stat, p, dof, expected = chi2_contingency(production_vs_supply.Value)

statistical_significance = 0.05
print("test has a value of {} with p value is {}".format(stat, p))
if p <= statistical_significance:
    print('Dependent (reject null hypothesis)')
else:
    print('Independent (null hypothesis holds true)')
corr, cp = pearsonr(production_vs_supply.Value.self, production_vs_supply.Value.other)
print("Correlation is {} with a p value of {}".format(corr, cp))

test has a value of 13.466196172284494 with p value is 0.14262083017651836
Independent (null hypothesis holds true)
Correlation is 0.9640025214458485 with a p value of 7.033660470792866e-06


In [24]:
# Test the null hypothesis that population has no relation with supply
stat, p, dof, expected = chi2_contingency(population_vs_supply.Value)

statistical_significance = 0.05
print("test has a value of {} with p value is {}".format(stat, p))
if p <= statistical_significance:
    print('Dependent (reject null hypothesis)')
    
else:
    print('Independent (null hypothesis holds true)')
corr, cp = pearsonr(population_vs_supply.Value.self, population_vs_supply.Value.other)
print("Correlation is {} with a p value of {}".format(corr, cp))

test has a value of 60.065710922820244 with p value is 1.3021509701528895e-09
Dependent (reject null hypothesis)
Correlation is 0.9914648955139599 with a p value of 2.2980440757798363e-08


In [25]:
# Test the null hypothesis that production has no relation with production
stat, p, dof, expected = chi2_contingency(population_vs_production.Value)

statistical_significance = 0.05
print("test has a value of {} with p value is {}".format(stat, p))
if p <= statistical_significance:
    print('Dependent (reject null hypothesis)')
else:
    print('Independent (null hypothesis holds true)')
corr, cp = pearsonr(population_vs_production.Value.self, population_vs_production.Value.other)
print("Correlation is {} with a p value of {}".format(corr, cp))

test has a value of 1.3143583992757133 with p value is 0.9983039781682498
Independent (null hypothesis holds true)
Correlation is 0.9853025295546025 with a p value of 2.005709809062478e-07


In [26]:
# Plotting a scattar plot to visualise the correction of food production and supply
fig = px.scatter(x=production_vs_supply.Value.self, y=production_vs_supply.Value.other, trendline="ols", trendline_scope="overall", color=population_vs_supply.Year.self)
fig.update_layout(title="Correlation of Annual Food Production and Supply in Africa", xaxis_title="Food Production", yaxis_title="Food Supply", legend_title="Year")
fig.show()

In [27]:
# Plotting a scattar plot to visualise the correction of food production and supply
fig = px.scatter(x=population_vs_supply.Value.self, y=population_vs_supply.Value.other, trendline="ols", trendline_scope="overall", color=population_vs_supply.Year.self)
fig.update_layout(title="Correlation of Total Africa Population and Food Supply", xaxis_title="Total Africa Population ", yaxis_title="Food Supply", legend_title="Year")
fig.show()

In [28]:
# Plotting a scattar plot to visualise the correlation of africa  population and food production
fig = px.scatter(x=population_vs_production.Value.self, y=population_vs_production.Value.other, trendline="ols", trendline_scope="overall", color=population_vs_production.Year.self)
fig.update_layout(title="Correlation of Total Africa Population and Food Production", xaxis_title="Africa Population", yaxis_title="Food Production", legend_title="Year")
fig.show()

In [234]:
# Plotting a Line Plot to show the distribution of population in African 
fig = px.line(african_population, x="Year", y="Value", title='African Population by Country', color="Country")
fig.show()

In [236]:
# Plotting a Line Plot to show the distribution of food production in African 
fig = px.line(country_production_yearly, x="Year", y="Value", title='African Food Production by Country', color="Country")
fig.show()

In [238]:
# Plotting a Line Plot to show the distribution of food supply in African 
fig = px.line(country_supply_yearly, x="Year", y="Value", title='African Food Supply by Country', color="Country")
fig.show()

In [30]:
# The mean food production of african countries
country_production_mean = produce.groupby(['Country'])['Value'].mean().reset_index()
country_production_mean

Unnamed: 0,Country,Value
0,Algeria,325.258621
1,Angola,362.035185
2,Benin,149.274576
3,Botswana,14.141176
4,Burkina Faso,232.85814
5,Cabo Verde,5.024242
6,Cameroon,304.107692
7,Central African Republic,55.38
8,Chad,115.584615
9,Congo,51.582


In [29]:
# The mean food production of african countries
country_supply_mean = supply.groupby(['Country'])['Value'].mean().reset_index()
country_supply_mean

Unnamed: 0,Country,Value
0,Algeria,3111.8
1,Angola,2255.6
2,Benin,2537.8
3,Botswana,2226.3
4,Burkina Faso,2607.2
5,Cabo Verde,2551.4
6,Cameroon,2460.3
7,Central African Republic,2071.9
8,Chad,2051.1
9,Congo,2153.0


In [27]:
# The mean food production of african countries
country_population_mean = african_population.groupby(['Country'])['Value'].mean().reset_index()
country_population_mean

Unnamed: 0,Country,Value
0,Algeria,35187.7265
1,Angola,22215.7536
2,Benin,8843.9468
3,Botswana,1924.9724
4,Burkina Faso,14969.5705
5,Cabo Verde,483.9887
6,Cameroon,19579.9481
7,Central African Republic,4261.5093
8,Chad,11416.8576
9,Congo,4071.293


In [31]:
# Comparing the mean of annual country poroduction, and annual supply
country_production_vs_supply = country_production_mean.compare(country_supply_mean, keep_equal=True, keep_shape=True)
country_production_vs_supply

Unnamed: 0_level_0,Country,Country,Value,Value
Unnamed: 0_level_1,self,other,self,other
0,Algeria,Algeria,325.258621,3111.8
1,Angola,Angola,362.035185,2255.6
2,Benin,Benin,149.274576,2537.8
3,Botswana,Botswana,14.141176,2226.3
4,Burkina Faso,Burkina Faso,232.85814,2607.2
5,Cabo Verde,Cabo Verde,5.024242,2551.4
6,Cameroon,Cameroon,304.107692,2460.3
7,Central African Republic,Central African Republic,55.38,2071.9
8,Chad,Chad,115.584615,2051.1
9,Congo,Congo,51.582,2153.0


In [32]:
# Comparing the mean of annual country population, and annual supply
country_population_vs_supply = country_population_mean.compare(country_supply_mean, keep_equal=True, keep_shape=True)
country_population_vs_supply

Unnamed: 0_level_0,Country,Country,Value,Value
Unnamed: 0_level_1,self,other,self,other
0,Algeria,Algeria,35187.7265,3111.8
1,Angola,Angola,22215.7536,2255.6
2,Benin,Benin,8843.9468,2537.8
3,Botswana,Botswana,1924.9724,2226.3
4,Burkina Faso,Burkina Faso,14969.5705,2607.2
5,Cabo Verde,Cabo Verde,483.9887,2551.4
6,Cameroon,Cameroon,19579.9481,2460.3
7,Central African Republic,Central African Republic,4261.5093,2071.9
8,Chad,Chad,11416.8576,2051.1
9,Congo,Congo,4071.293,2153.0


In [33]:
# Comparing the mean of annual country population,  and annual production
country_population_vs_production = country_population_mean.compare(country_production_mean,  keep_equal=True, keep_shape=True)
country_population_vs_production

Unnamed: 0_level_0,Country,Country,Value,Value
Unnamed: 0_level_1,self,other,self,other
0,Algeria,Algeria,35187.7265,325.258621
1,Angola,Angola,22215.7536,362.035185
2,Benin,Benin,8843.9468,149.274576
3,Botswana,Botswana,1924.9724,14.141176
4,Burkina Faso,Burkina Faso,14969.5705,232.85814
5,Cabo Verde,Cabo Verde,483.9887,5.024242
6,Cameroon,Cameroon,19579.9481,304.107692
7,Central African Republic,Central African Republic,4261.5093,55.38
8,Chad,Chad,11416.8576,115.584615
9,Congo,Congo,4071.293,51.582


In [72]:
# Test the null hypothesis that country production has no relation with supply
stat, p, dof, expected = chi2_contingency(country_production_vs_supply.Value)

statistical_significance = 0.05
print("test has a value of {} with p value is {}".format(stat, p))
if p <= statistical_significance:
    print('Dependent (reject null hypothesis)')
else:
    print('Independent (null hypothesis holds true)')
corr, cp = pearsonr(production_vs_supply.Value.self, production_vs_supply.Value.other)
print("Correlation is {} with a p value of {}".format(corr, cp))

test has a value of 16259.084326856908 with p value is 0.0
Dependent (reject null hypothesis)
Correlation is 0.2744650739235617 with a p value of 0.06806553238226742


In [73]:
# Test the null hypothesis that country population has no relation with supply
stat, p, dof, expected = chi2_contingency(country_population_vs_supply.Value)

statistical_significance = 0.05
print("test has a value of {} with p value is {}".format(stat, p))
if p <= statistical_significance:
    print('Dependent (reject null hypothesis)')
else:
    print('Independent (null hypothesis holds true)')
corr, cp = pearsonr(production_vs_supply.Value.self, production_vs_supply.Value.other)
print("Correlation is {} with a p value of {}".format(corr, cp))

test has a value of 193143.0925134271 with p value is 0.0
Dependent (reject null hypothesis)
Correlation is 0.2084172302146829 with a p value of 0.1694714722760874


In [74]:
# Test the null hypothesis that country population has no relation with production
stat, p, dof, expected = chi2_contingency(country_population_vs_production.Value)

statistical_significance = 0.05
print("test has a value of {} with p value is {}".format(stat, p))
if p <= statistical_significance:
    print('Dependent (reject null hypothesis)')
else:
    print('Independent (null hypothesis holds true)')
corr, cp = pearsonr(production_vs_supply.Value.self, production_vs_supply.Value.other)
print("Correlation is {} with a p value of {}".format(corr, cp))

test has a value of 2653.000439654067 with p value is 0.0
Dependent (reject null hypothesis)
Correlation is 0.9466059065377124 with a p value of 9.246580722944023e-23


In [158]:
# Plotting a scattar plot to visualise the correlation of food production and supply
fig = px.scatter(x=country_production_vs_supply.Value.self, y=country_production_vs_supply.Value.other, trendline="ols", trendline_scope="overall", color=country_production_vs_supply.Country.self)
fig.update_layout(title="Correlation of Food Production and Supply in African Countries", xaxis_title="Food Production", yaxis_title="Food Supply", legend_title="Country")
fig.show()

In [155]:
# Plotting a scattar plot to visualise the correlation of africa country population and food supply
fig = px.scatter(x=country_population_vs_supply.Value.self, y=country_population_vs_supply.Value.other, trendline="ols", trendline_scope="overall", color=country_population_vs_supply.Country.self)
fig.update_layout(title="Correlation of African Countries Population and Food Supply", xaxis_title="Africa Country Population ", yaxis_title="Food Supply", legend_title="Country")
fig.show()

In [157]:
# Plotting a scattar plot to visualise the correlation of africa country population and food production
fig = px.scatter(x=country_population_vs_production.Value.self, y=country_population_vs_production.Value.other, trendline="ols", trendline_scope="overall", color=country_population_vs_production.Country.self)
fig.update_layout(title="Correlation of African Countries Population and Food Production", xaxis_title="Africa Country Population", yaxis_title="Food Production", legend_title="Country")
fig.show()

In [34]:
# Calculating the percentage change in production in each country between years and ther cummulative change from 2004 - 2013
annual_change = []
annual_cumchange = []
for country in country_production_yearly.Country.unique():
    annual_change.extend(country_production_yearly.loc[country_production_yearly.Country == country].Value.pct_change().mul(100).round(2))
country_production_yearly['annual_change'] = annual_change
for country in country_production_yearly.Country.unique():
    annual_cumchange.extend(country_production_yearly.loc[country_production_yearly.Country == country]['annual_change'].cumsum())
country_production_yearly['annual_cumchange'] = annual_cumchange
country_production_yearly = country_production_yearly.fillna(0)
country_production_yearly

Unnamed: 0,Country,Year,Value,annual_change,annual_cumchange
0,Algeria,2004,15536,0.00,0.00
1,Algeria,2005,15667,0.84,0.84
2,Algeria,2006,16417,4.79,5.63
3,Algeria,2007,14763,-10.07,-4.44
4,Algeria,2008,13841,-6.25,-10.69
...,...,...,...,...,...
445,Zimbabwe,2009,5754,-2.09,-38.80
446,Zimbabwe,2010,6777,17.78,-21.02
447,Zimbabwe,2011,7551,11.42,-9.60
448,Zimbabwe,2012,8173,8.24,-1.36


In [35]:
# Calculating the percentage change in supply in each country between years and ther cummulative change from 2004 - 2013
annual_change = []
annual_cumchange = []
for country in country_supply_yearly.Country.unique():
    annual_change.extend(country_supply_yearly.loc[country_supply_yearly.Country == country].Value.pct_change().mul(100).round(2))
country_supply_yearly['annual_change'] = annual_change
for country in country_supply_yearly.Country.unique():
    annual_cumchange.extend(country_supply_yearly.loc[country_supply_yearly.Country == country]['annual_change'].cumsum())
country_supply_yearly['annual_cumchange'] = annual_cumchange
country_supply_yearly = country_supply_yearly.fillna(0)
country_supply_yearly

Unnamed: 0,Country,Year,Value,annual_change,annual_cumchange
0,Algeria,2004,2987,0.00,0.00
1,Algeria,2005,2958,-0.97,-0.97
2,Algeria,2006,3047,3.01,2.04
3,Algeria,2007,3041,-0.20,1.84
4,Algeria,2008,3048,0.23,2.07
...,...,...,...,...,...
445,Zimbabwe,2009,2147,2.53,5.05
446,Zimbabwe,2010,2168,0.98,6.03
447,Zimbabwe,2011,2200,1.48,7.51
448,Zimbabwe,2012,2197,-0.14,7.37


In [36]:
# Calculating the percentage change in population in each country between years and ther cummulative change from 2004 - 2013
annual_change = []
annual_cumchange = []
for country in african_population.Country.unique():
    annual_change.extend(african_population.loc[african_population.Country == country].Value.pct_change().mul(100).round(2))
african_population['annual_change'] = annual_change
for country in african_population.Country.unique():
    annual_cumchange.extend(african_population.loc[african_population.Country == country]['annual_change'].cumsum())
african_population['annual_cumchange'] = annual_cumchange
african_population = african_population.fillna(0)
african_population

Unnamed: 0,Country,Year,Value,annual_change,annual_cumchange
0,Algeria,2004,32692.153,0.00,0.00
1,Algeria,2005,33149.720,1.40,1.40
2,Algeria,2006,33641.007,1.48,2.88
3,Algeria,2007,34166.976,1.56,4.44
4,Algeria,2008,34730.604,1.65,6.09
...,...,...,...,...,...
445,Zimbabwe,2009,12526.964,1.19,4.15
446,Zimbabwe,2010,12697.728,1.36,5.51
447,Zimbabwe,2011,12894.323,1.55,7.06
448,Zimbabwe,2012,13115.149,1.71,8.77


In [48]:
# Plotting a bar graph to show thecumulative percentage change in population, production and supply
fig = go.Figure()
fig.add_trace(go.Bar(
    y=african_population.loc[african_population.Year == '2013'].Country, x=african_population.loc[african_population.Year == '2013'].annual_cumchange,
    name='Change in Population', orientation='h',
    marker_color="indianred"
))
fig.add_trace(go.Bar(
    y=country_production_yearly.loc[country_production_yearly.Year == 2013].Country, x=country_production_yearly.loc[country_production_yearly.Year == 2013].annual_cumchange,
    name='Change in Production',  orientation='h',
    marker_color="teal"
))
fig.add_trace(go.Bar(
    y=country_supply_yearly.loc[country_supply_yearly.Year == 2013].Country, x=country_supply_yearly.loc[country_supply_yearly.Year == 2013].annual_cumchange,
    name='Change in Supply',  orientation='h',
    marker_color="lightsalmon"
))
fig.update_layout(autosize=False, width=1000, height=1500,
    title={
        'text': "Percentage Cumulative Change in Annual Population, Food Production and Supply in Africa from 2004",
        'y':0.95,
        'x':0.5,
        'xanchor': 'auto',
        'yanchor': 'top'},
    barmode='group')
fig.show()

## FOOD ITEMS PRODUCED IN AFRICA

In [None]:
# Total produced food by type
total_food_type = produce.groupby('Item')['Value'].sum().reset_index()
total_food_type

Unnamed: 0,Item,Value
0,"Alcohol, Non-Food",3652
1,Apples and products,21706
2,"Aquatic Animals, Others",13
3,Aquatic Plants,1378
4,Bananas,153785
...,...,...
89,Tomatoes and products,172192
90,"Vegetables, Other",410403
91,Wheat and products,229875
92,Wine,11219


In [None]:
# Top 50 food item produced in Africa
total_food_type.nlargest(50, ['Value'])

Unnamed: 0,Item,Value
12,Cassava and products,1118948
82,Sugar cane,887008
40,Maize and products,574948
93,Yams,511523
90,"Vegetables, Other",410403
43,Milk - Excluding Butter,380380
76,Sorghum and products,241080
91,Wheat and products,229875
9,"Beverages, Fermented",227037
66,Potatoes and products,221092


In [186]:
# Plotting a pie chart to show the top 20 food types produced in Africa
fig = px.pie(total_food_type.nlargest(20, ['Value']), values="Value", names="Item", title='Top 20 Food Items Produced in Africa')
fig.update_traces(textposition='inside', textinfo='percent')
fig.show()

In [None]:
# Calculating quantity of various food items produced yearly
item_produced_yearly = produce.groupby(['Item', 'Year'])['Value'].sum().reset_index()
item_produced_yearly 

Unnamed: 0,Item,Year,Value
0,"Alcohol, Non-Food",2004,296
1,"Alcohol, Non-Food",2005,332
2,"Alcohol, Non-Food",2006,338
3,"Alcohol, Non-Food",2007,370
4,"Alcohol, Non-Food",2008,477
...,...,...,...
935,Yams,2009,45518
936,Yams,2010,51253
937,Yams,2011,54819
938,Yams,2012,59266


In [None]:
fig = go.Figure()
fig.add_trace(go.Box(
    x = item_produced_yearly.Year,
    y = item_produced_yearly.Value,
    name='Mean',
    marker_color='darkblue',
    boxmean=True # represent mean
))
fig.update_layout(
    title={
        'text': "African Annual Food Production",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

In [None]:
# Analysing for outliers in african food production to show the food items outside the upper fence
Q1 = np.percentile(item_produced_yearly['Value'], 25,
                   interpolation = 'midpoint')

Q3 = np.percentile(item_produced_yearly['Value'], 75,
                   interpolation = 'midpoint')
IQR = Q3 - Q1
food_item_outlier = item_produced_yearly.loc[item_produced_yearly['Value'] > (Q3+1.5*IQR)].sort_values(by=['Year', 'Value']).reset_index(drop=True)
food_item_outlier.Item.unique()

array(['Plantains', 'Sorghum and products', 'Beverages, Fermented',
       'Wheat and products', 'Milk - Excluding Butter',
       'Vegetables, Other', 'Yams', 'Maize and products', 'Sugar cane',
       'Cassava and products', 'Potatoes and products',
       'Millet and products', 'Tomatoes and products',
       'Rice (Milled Equivalent)', 'Sweet potatoes', 'Bananas'],
      dtype=object)

In [None]:
# Calculating the percentage change in item quantity in each country between years and ther cummulative change from 2004 - 2013
annual_change = []
annual_cumchange = []
for item in item_produced_yearly.Item.unique():
    annual_change.extend(item_produced_yearly.loc[item_produced_yearly.Item == item].Value.pct_change().mul(100).round(2))
item_produced_yearly['annual_change'] = annual_change
for item in item_produced_yearly.Item.unique():
    annual_cumchange.extend(item_produced_yearly.loc[item_produced_yearly.Item == item]['annual_change'].cumsum())
item_produced_yearly['annual_cumchange'] = annual_cumchange
item_produced_yearly = item_produced_yearly.fillna(0)
item_produced_yearly

Unnamed: 0,Item,Year,Value,annual_change,annual_cumchange
0,"Alcohol, Non-Food",2004,296,0.00,0.00
1,"Alcohol, Non-Food",2005,332,12.16,12.16
2,"Alcohol, Non-Food",2006,338,1.81,13.97
3,"Alcohol, Non-Food",2007,370,9.47,23.44
4,"Alcohol, Non-Food",2008,477,28.92,52.36
...,...,...,...,...,...
935,Yams,2009,45518,-10.66,3.22
936,Yams,2010,51253,12.60,15.82
937,Yams,2011,54819,6.96,22.78
938,Yams,2012,59266,8.11,30.89


In [187]:
# Plotting a bar graph to show the cumulative percentage change in various food items produced between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    x=item_produced_yearly.loc[item_produced_yearly.Year == 2013].Item, y=item_produced_yearly.loc[item_produced_yearly.Year == 2013].annual_cumchange,
    name='Change in Production',
    marker_color="teal"
))
fig.update_layout(
    title={
        'text': "Percentage Cumulative Change in Production of Various Food Items in Africa from 2004 - 2013",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [73]:
# Calculating items produced in each country
item_by_country = produce.groupby(['Country','Item'])['Value'].sum().reset_index()
item_by_country = pd.DataFrame(item_by_country)
item_by_country

Unnamed: 0,Country,Item,Value
0,Algeria,Apples and products,3003
1,Algeria,Bananas,0
2,Algeria,Barley and products,12770
3,Algeria,Beans,11
4,Algeria,Beer,1358
...,...,...,...
2307,Zimbabwe,Tea (including mate),197
2308,Zimbabwe,Tomatoes and products,204
2309,Zimbabwe,"Vegetables, Other",1749
2310,Zimbabwe,Wheat and products,996


In [74]:
# Filtering out the countries that do not produce certain food items
item_by_country = item_by_country[item_by_country['Value'] > 0].reset_index(drop=True)
item_by_country

Unnamed: 0,Country,Item,Value
0,Algeria,Apples and products,3003
1,Algeria,Barley and products,12770
2,Algeria,Beans,11
3,Algeria,Beer,1358
4,Algeria,Bovine Meat,1273
...,...,...,...
2106,Zimbabwe,Tea (including mate),197
2107,Zimbabwe,Tomatoes and products,204
2108,Zimbabwe,"Vegetables, Other",1749
2109,Zimbabwe,Wheat and products,996


In [75]:
# Numbers of Countries producing a specific item
items_from_country = produce.groupby('Item')['Country'].nunique().reset_index()
items_from_country = items_from_country.sort_values(by=['Country', 'Item']).reset_index(drop=True)
items_from_country

Unnamed: 0,Item,Country
0,Cloves,3
1,Rye and products,3
2,Olive Oil,4
3,Olives (including preserved),4
4,Rape and Mustard Oil,4
...,...,...
89,"Fruits, Other",45
90,Milk - Excluding Butter,45
91,Mutton & Goat Meat,45
92,"Offals, Edible",45


In [66]:
# Plotting a bar graph to show the cumulative percentage change in various food items produced between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_from_country.Item, x=items_from_country.Country,
    name='Number', orientation='h',
    marker_color="chocolate"
))
fig.update_layout(autosize=False, width=1000, height=1500,
    title={
        'text': "Number of Countries producing Food Items",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [163]:
# Numbers of Countries producing a specific item
items_to_country = produce.groupby(['Item', 'Country'])['Value'].mean().reset_index()
items_to_country = items_to_country.sort_values(by=['Item', 'Country']).reset_index(drop=True)
items_to_country = items_to_country.loc[items_to_country.Value > 0]
items_to_country

Unnamed: 0,Item,Country,Value
0,"Alcohol, Non-Food",Egypt,20.5
1,"Alcohol, Non-Food",Kenya,14.2
2,"Alcohol, Non-Food",Mali,2.0
3,"Alcohol, Non-Food",Mauritius,5.2
4,"Alcohol, Non-Food",Morocco,6.5
...,...,...,...
2307,Yams,Rwanda,23.8
2308,Yams,Sao Tome and Principe,2.0
2309,Yams,Sudan,150.2
2310,Yams,Togo,677.5


In [166]:
# Plotting a bar graph to show the country producing in Plantain between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Plantains'].Value, x=items_to_country.loc[items_to_country.Item == 'Plantains'].Country,
    name='Plantain Producing Country',
    marker_color="purple"
))
fig.update_layout(
    title={
        'text': "Plantain Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [167]:
# Plotting a bar graph to show the country producing in Sorghum and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Sorghum and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Sorghum and products'].Country,
    name='Sorghum and products Producing Country',
    marker_color="red"
))
fig.update_layout(
    title={
        'text': "Sorghum and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [168]:
# Plotting a bar graph to show the country producing in Beverages, Fermented between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Beverages, Fermented'].Value, x=items_to_country.loc[items_to_country.Item == 'Beverages, Fermented'].Country,
    name='Beverages, Fermented Producing Country',
    marker_color="darksalmon"
))
fig.update_layout(
    title={
        'text': "Beverages, Fermented Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [169]:
# Plotting a bar graph to show the  country producing in Wheat and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Wheat and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Wheat and products'].Country,
    name='Wheat and products Producing Country',
    marker_color="darkorange"
))
fig.update_layout(
    title={
        'text': "Wheat and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [170]:
# Plotting a bar graph to show the country producing in Milk - Excluding Butter between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Milk - Excluding Butter'].Value, x=items_to_country.loc[items_to_country.Item == 'Milk - Excluding Butter'].Country,
    name='Milk - Excluding Butter Producing Country',
    marker_color="cyan"
))
fig.update_layout(
    title={
        'text': "Milk - Excluding Butter Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [171]:
# Plotting a bar graph to show the country producing in Vegetables, Other between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Vegetables, Other'].Value, x=items_to_country.loc[items_to_country.Item == 'Vegetables, Other'].Country,
    name='Vegetables, Other Producing Country',
    marker_color="skyblue"
))
fig.update_layout(
    title={
        'text': "Vegetables, Other Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [172]:
# Plotting a bar graph to show the country producing in Yams between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Yams'].Value, x=items_to_country.loc[items_to_country.Item == 'Yams'].Country,
    name='Yams Producing Country',
    marker_color="darkblue"
))
fig.update_layout(
    title={
        'text': "Yams Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [173]:
# Plotting a bar graph to show the country producing in Maize and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Maize and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Maize and products'].Country,
    name='Maize and products Producing Country',
    marker_color="darkgreen"
))
fig.update_layout(
    title={
        'text': "Maize and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [174]:
# Plotting a bar graph to show the country producing in Sugar cane between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Sugar cane'].Value, x=items_to_country.loc[items_to_country.Item == 'Sugar cane'].Country,
    name='Sugar cane Producing Country',
    marker_color="green"
))
fig.update_layout(
    title={
        'text': "Sugar cane Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [175]:
# Plotting a bar graph to show the country producing in Cassava and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Cassava and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Cassava and products'].Country,
    name='Cassava and products Producing Country',
    marker_color="yellow"
))
fig.update_layout(
    title={
        'text': "Cassava and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [176]:
# Plotting a bar graph to show the country producing in Potatoes and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Potatoes and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Potatoes and products'].Country,
    name='Potatoes and products Producing Country',
    marker_color="firebrick"
))
fig.update_layout(
    title={
        'text': "Potatoes and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [177]:
# Plotting a bar graph to show the country producing in Millet and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Millet and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Millet and products'].Country,
    name='Millet and products Producing Country',
    marker_color="magenta"
))
fig.update_layout(
    title={
        'text': "Millet and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [178]:
# Plotting a bar graph to show the country producing in Tomatoes and products between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Tomatoes and products'].Value, x=items_to_country.loc[items_to_country.Item == 'Tomatoes and products'].Country,
    name='Tomatoes and products Producing Country',
    marker_color="darkred"
))
fig.update_layout(
    title={
        'text': "Tomatoes and products Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [179]:
# Plotting a bar graph to show the country producing in Rice (Milled Equivalent) between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Rice (Milled Equivalent)'].Value, x=items_to_country.loc[items_to_country.Item == 'Rice (Milled Equivalent)'].Country,
    name='Rice (Milled Equivalent) Producing Country',
    marker_color="burlywood"
))
fig.update_layout(
    title={
        'text': "Rice (Milled Equivalent) Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [180]:
# Plotting a bar graph to show the country producing in Sweet potatoes between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Sweet potatoes'].Value, x=items_to_country.loc[items_to_country.Item == 'Sweet potatoes'].Country,
    name='Sweet potatoes Producing Country',
    marker_color="cadetblue"
))
fig.update_layout(
    title={
        'text': "Sweet potatoes Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [181]:
# Plotting a bar graph to show the country producing in Bananas between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Bananas'].Value, x=items_to_country.loc[items_to_country.Item == 'Bananas'].Country,
    name='Bananas Producing Country',
    marker_color="brown"
))
fig.update_layout(
    title={
        'text': "Bananas Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [182]:
# Plotting a bar graph to show the country producing in Oats between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=items_to_country.loc[items_to_country.Item == 'Oats'].Value, x=items_to_country.loc[items_to_country.Item == 'Oats'].Country,
    name='Oats Producing Country',
    marker_color="lime"
))
fig.update_layout(
    title={
        'text': "Oats Producing Country",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [76]:
# Relation between each country based on food items
def get_relations(df):
    sources = df.Country.unique()
    targets = df.Country
    relations = set()

    for source in sources:
        targets = list(filter((source).__ne__, targets)) # to remove duplicates of source and target
        for target in targets:
            items = list(set(df.loc[df.Country == source].Item).intersection(df.loc[df.Country == target].Item))
            for item in items:
                relations.add((source, target, item))
    return(relations)

country_food_relations = get_relations(item_by_country)
country_food_relations

{('Guinea-Bissau', 'Namibia', 'Offals, Edible'),
 ('Congo', 'Nigeria', 'Maize and products'),
 ('Ethiopia', 'Namibia', 'Offals, Edible'),
 ('Madagascar', 'Zimbabwe', 'Bovine Meat'),
 ('Benin', 'Guinea-Bissau', 'Cereals, Other'),
 ('Mali', 'Rwanda', 'Beer'),
 ('Benin', 'Gambia', 'Nuts and products'),
 ('Mozambique', 'Sudan', 'Sweet potatoes'),
 ('Angola', 'Cabo Verde', 'Marine Fish, Other'),
 ('Angola', 'Cameroon', 'Palmkernel Oil'),
 ('South Africa', 'Zimbabwe', 'Fruits, Other'),
 ('Central African Republic', 'Madagascar', 'Rice (Milled Equivalent)'),
 ('Mauritania', 'Sierra Leone', 'Pelagic Fish'),
 ('Egypt', 'South Africa', 'Pigmeat'),
 ('Madagascar', 'Zimbabwe', 'Tomatoes and products'),
 ('Guinea', 'Liberia', 'Milk - Excluding Butter'),
 ('Tunisia', 'Zambia', 'Spices, Other'),
 ('Cameroon', 'Zimbabwe', 'Pulses, Other and products'),
 ('Algeria', 'Malawi', 'Wheat and products'),
 ('Angola', 'Mozambique', 'Eggs'),
 ('Ivory Coast', 'Senegal', 'Pulses, Other and products'),
 ('Ethiopia

In [78]:
# Using the set of relations to create a panda Dataframe sorting by related food items 
country_food_relations = pd.DataFrame(country_food_relations, columns = ['Source', 'Target', 'Relations'])
country_food_relations = country_food_relations.sort_values(by=['Relations', 'Source', 'Target']).reset_index(drop=True)
country_food_relations

Unnamed: 0,Source,Target,Relations
0,Egypt,Kenya,"Alcohol, Non-Food"
1,Egypt,Mali,"Alcohol, Non-Food"
2,Egypt,Mauritius,"Alcohol, Non-Food"
3,Egypt,Morocco,"Alcohol, Non-Food"
4,Egypt,South Africa,"Alcohol, Non-Food"
...,...,...,...
30273,Sao Tome and Principe,Togo,Yams
30274,Sao Tome and Principe,United Republic of Tanzania,Yams
30275,Sudan,Togo,Yams
30276,Sudan,United Republic of Tanzania,Yams


In [None]:
# A plot to show the network between country on the basis of similarity in food items produced
got_net = Network(notebook=False, directed=False,height='750px', width='100%', bgcolor='#222222', font_color='white')

# set the physics layout of the network
got_net.barnes_hut()

sources = country_food_relations['Source']
targets = country_food_relations['Target']
weights = country_food_relations['Relations']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show('food.html')

In [111]:
# Mean quantity of food produced in different country
item_producedincountry_yearly = produce.groupby(['Country', 'Item'])['Value'].mean().reset_index()
item_producedincountry_yearly = item_producedincountry_yearly.loc[item_producedincountry_yearly.Value > 0]
item_producedincountry_yearly

Unnamed: 0,Country,Item,Value
0,Algeria,Apples and products,300.3
2,Algeria,Barley and products,1277.0
3,Algeria,Beans,1.1
4,Algeria,Beer,135.8
6,Algeria,Bovine Meat,127.3
...,...,...,...
2307,Zimbabwe,Tea (including mate),19.7
2308,Zimbabwe,Tomatoes and products,20.4
2309,Zimbabwe,"Vegetables, Other",174.9
2310,Zimbabwe,Wheat and products,99.6


In [185]:
# Plotting a bar graph to show the mean quantity of various food items produced in Egypt between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=item_producedincountry_yearly.loc[item_producedincountry_yearly.Country == 'Egypt'].Value, x=item_producedincountry_yearly.loc[item_producedincountry_yearly.Country == 'Egypt'].Item,
    name='Eygpt Food Items',
    marker_color="purple"
))
fig.update_layout(
    title={
        'text': "Food Items produced in Egypt",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [184]:
# Plotting a bar graph to show the mean quantity of various food items produced in Nigeria between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=item_producedincountry_yearly.loc[item_producedincountry_yearly.Country == 'Nigeria'].Value, x=item_producedincountry_yearly.loc[item_producedincountry_yearly.Country == 'Nigeria'].Item,
    name='Nigeria Food Items',
    marker_color="purple"
))
fig.update_layout(
    title={
        'text': "Food Items produced in Nigeria",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [183]:
# Plotting a bar graph to show the mean quantity of various food items produced in South Africa between 2004 - 2013
fig = go.Figure()
fig.add_trace(go.Bar(
    y=item_producedincountry_yearly.loc[item_producedincountry_yearly.Country == 'South Africa'].Value, x=item_producedincountry_yearly.loc[item_producedincountry_yearly.Country == 'South Africa'].Item,
    name='Eygpt Food Items',
    marker_color="purple"
))
fig.update_layout(
    title={
        'text': "Food Items produced in South Africa",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()