In [1]:
#importing the necessary libraries for data exploration and visualization

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
filename = 'food_data.csv'
food_data = pd.read_csv(filename)

In [3]:
food_data.head() #displays the first five rows

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y2014,Y2015,Y2016,Y2017,Y2018
0,4,Algeria,2501,Population,511,Total Population - Both sexes,1000 persons,38924.0,39728.0,40551.0,41389.0,42228.0
1,4,Algeria,2501,Population,5301,Domestic supply quantity,1000 tonnes,0.0,0.0,0.0,0.0,0.0
2,4,Algeria,2901,Grand Total,664,Food supply (kcal/capita/day),kcal/capita/day,3377.0,3379.0,3372.0,3341.0,3322.0
3,4,Algeria,2901,Grand Total,674,Protein supply quantity (g/capita/day),g/capita/day,94.9,94.35,94.72,92.82,91.83
4,4,Algeria,2901,Grand Total,684,Fat supply quantity (g/capita/day),g/capita/day,80.06,79.36,77.4,80.19,77.28


In [4]:
food_data.shape #checks the number of rows and columns

(60943, 12)

In [5]:
food_data.describe(include='all') #shows the statistical analysis of the data

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y2014,Y2015,Y2016,Y2017,Y2018
count,60943.0,60943,60943.0,60943,60943.0,60943,60943,59354.0,59395.0,59408.0,59437.0,59507.0
unique,,49,,119,,18,5,,,,,
top,,Kenya,,Milk - Excluding Butter,,Domestic supply quantity,1000 tonnes,,,,,
freq,,1560,,1262,,5295,40933,,,,,
mean,134.265576,,2687.176706,,3814.856456,,,134.196282,135.235966,136.555222,140.917765,143.758381
std,72.605709,,146.055739,,2212.007033,,,1567.663696,1603.403984,1640.007194,1671.862359,1710.782658
min,4.0,,2501.0,,511.0,,,-1796.0,-3161.0,-3225.0,-1582.0,-3396.0
25%,74.0,,2562.0,,684.0,,,0.0,0.0,0.0,0.0,0.0
50%,136.0,,2630.0,,5142.0,,,0.09,0.08,0.08,0.1,0.07
75%,195.0,,2775.0,,5511.0,,,8.34,8.46,8.43,9.0,9.0


In [6]:
# checking the datatypes to see if any correction needs to be made
food_data.dtypes

Area Code         int64
Area             object
Item Code         int64
Item             object
Element Code      int64
Element          object
Unit             object
Y2014           float64
Y2015           float64
Y2016           float64
Y2017           float64
Y2018           float64
dtype: object

In [7]:
#checking for null values
food_data.isnull().sum()

Area Code          0
Area               0
Item Code          0
Item               0
Element Code       0
Element            0
Unit               0
Y2014           1589
Y2015           1548
Y2016           1535
Y2017           1506
Y2018           1436
dtype: int64

In [8]:
#check for duplicates

food_data.duplicated().any()

False

In [9]:
food_data.corr(method='pearson')

Unnamed: 0,Area Code,Item Code,Element Code,Y2014,Y2015,Y2016,Y2017,Y2018
Area Code,1.0,-0.005159,-0.000209,0.006164,0.005472,0.005247,0.005006,0.005665
Item Code,-0.005159,1.0,-0.024683,0.021722,0.020857,0.020109,0.021494,0.021314
Element Code,-0.000209,-0.024683,1.0,0.024457,0.023889,0.023444,0.024254,0.024279
Y2014,0.006164,0.021722,0.024457,1.0,0.994647,0.996081,0.99523,0.994872
Y2015,0.005472,0.020857,0.023889,0.994647,1.0,0.995739,0.988048,0.988208
Y2016,0.005247,0.020109,0.023444,0.996081,0.995739,1.0,0.992785,0.992757
Y2017,0.005006,0.021494,0.024254,0.99523,0.988048,0.992785,1.0,0.998103
Y2018,0.005665,0.021314,0.024279,0.994872,0.988208,0.992757,0.998103,1.0


In [10]:
food_data_item = food_data[food_data['Item'] == 'Wine']

In [11]:
food_data_item

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y2014,Y2015,Y2016,Y2017,Y2018
916,4,Algeria,2655,Wine,5511,Production,1000 tonnes,52.00,10.0,7.0,62.00,54.00
917,4,Algeria,2655,Wine,5611,Import Quantity,1000 tonnes,7.00,8.0,11.0,9.00,9.00
918,4,Algeria,2655,Wine,5072,Stock Variation,1000 tonnes,0.00,0.0,0.0,0.00,0.00
919,4,Algeria,2655,Wine,5911,Export Quantity,1000 tonnes,0.00,0.0,0.0,0.00,0.00
920,4,Algeria,2655,Wine,5301,Domestic supply quantity,1000 tonnes,59.00,18.0,18.0,71.00,63.00
...,...,...,...,...,...,...,...,...,...,...,...,...
60551,181,Zimbabwe,2655,Wine,5142,Food,1000 tonnes,4.00,4.0,4.0,4.00,4.00
60552,181,Zimbabwe,2655,Wine,645,Food supply quantity (kg/capita/yr),kg,0.31,0.3,0.3,0.26,0.31
60553,181,Zimbabwe,2655,Wine,664,Food supply (kcal/capita/day),kcal/capita/day,1.00,1.0,1.0,0.00,1.00
60554,181,Zimbabwe,2655,Wine,674,Protein supply quantity (g/capita/day),g/capita/day,0.00,0.0,0.0,0.00,0.00


In [12]:
food_data_wine = food_data_item.loc[:, ['Y2015','Y2018']].sum()

In [13]:
food_data_wine

Y2015    4251.81
Y2018    4039.32
dtype: float64

In [14]:
food_data_element = food_data[food_data['Element'] == 'Processing']

In [15]:
food_data_element2 = food_data_element.loc[:, ['Y2017','Y2018']].sum()
food_data_element2

Y2017    292836.0
Y2018    308429.0
dtype: float64

In [16]:
y = [(2, 4), (7, 8), (1, 5, 9)]

In [17]:
y[1][1]

8

In [18]:
S = [['him', 'sell'], [90, 28, 43]]

S[0][1][1]

'e'

In [19]:
Ist = [[35, 'Portugal', 94], [33, 'Argentina', 93], [30 , 'Brazil', 92]]

col = ['Age','Nationality','Overall']

In [20]:
pd.DataFrame(Ist, columns=col, index=[1,2,3])

Unnamed: 0,Age,Nationality,Overall
1,35,Portugal,94
2,33,Argentina,93
3,30,Brazil,92


In [21]:
food_data_area = food_data.loc[:, ['Y2017', 'Area']]
food_data_area

Unnamed: 0,Y2017,Area
0,41389.00,Algeria
1,0.00,Algeria
2,3341.00,Algeria
3,92.82,Algeria
4,80.19,Algeria
...,...,...
60938,19.00,Zimbabwe
60939,1.33,Zimbabwe
60940,1.00,Zimbabwe
60941,0.04,Zimbabwe


In [22]:
food_data_area.groupby('Area')['Area'].value_counts()

Area                         Area                       
Algeria                      Algeria                        1313
Angola                       Angola                         1424
Benin                        Benin                          1285
Botswana                     Botswana                       1378
Burkina Faso                 Burkina Faso                   1409
Cabo Verde                   Cabo Verde                     1260
Cameroon                     Cameroon                       1460
Central African Republic     Central African Republic       1344
Chad                         Chad                           1343
Comoros                      Comoros                          45
Congo                        Congo                          1413
Côte d'Ivoire                Côte d'Ivoire                  1467
Djibouti                     Djibouti                       1095
Egypt                        Egypt                          1488
Eswatini                     Eswa

In [23]:
food_data_protein = food_data[food_data['Element'] == 'Protein supply quantity (g/capita/day)']
food_data_protein

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,Y2014,Y2015,Y2016,Y2017,Y2018
3,4,Algeria,2901,Grand Total,674,Protein supply quantity (g/capita/day),g/capita/day,94.90,94.35,94.72,92.82,91.83
6,4,Algeria,2903,Vegetal Products,674,Protein supply quantity (g/capita/day),g/capita/day,67.14,67.38,67.37,66.11,67.10
9,4,Algeria,2941,Animal Products,674,Protein supply quantity (g/capita/day),g/capita/day,27.76,26.97,27.35,26.71,24.73
25,4,Algeria,2905,Cereals - Excluding Beer,674,Protein supply quantity (g/capita/day),g/capita/day,49.43,49.54,50.01,48.86,49.36
41,4,Algeria,2511,Wheat and products,674,Protein supply quantity (g/capita/day),g/capita/day,43.20,43.33,43.85,43.18,42.85
...,...,...,...,...,...,...,...,...,...,...,...,...
60903,181,Zimbabwe,2769,"Aquatic Animals, Others",674,Protein supply quantity (g/capita/day),g/capita/day,0.00,0.00,0.00,0.00,0.00
60911,181,Zimbabwe,2775,Aquatic Plants,674,Protein supply quantity (g/capita/day),g/capita/day,0.00,0.00,0.00,0.00,0.00
60921,181,Zimbabwe,2928,Miscellaneous,674,Protein supply quantity (g/capita/day),g/capita/day,0.10,0.11,0.08,0.05,0.04
60931,181,Zimbabwe,2680,Infant food,674,Protein supply quantity (g/capita/day),g/capita/day,0.00,0.00,0.00,0.00,0.00


In [24]:
food_data_protein_madagascar = food_data_protein[food_data_protein['Area'] == 'Madagascar']
food_data_protein_madagascar.sum()

Area Code                                                   14319
Area            MadagascarMadagascarMadagascarMadagascarMadaga...
Item Code                                                  299405
Item            Grand TotalVegetal ProductsAnimal ProductsCere...
Element Code                                                74814
Element         Protein supply quantity (g/capita/day)Protein ...
Unit            g/capita/dayg/capita/dayg/capita/dayg/capita/d...
Y2014                                                      177.79
Y2015                                                      173.05
Y2016                                                       173.4
Y2017                                                      175.75
Y2018                                                      176.64
dtype: object