In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, engine

from config import local_mysql_password, local_mysql_user

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)

In [121]:
# Load the crop yield data
file_name = 'Production_Crops_Livestock_E_All_Data_NOFLAG'
fao_crop_yield_data = pd.read_csv(f"./data/fao_crop_data/unnormalized/{file_name}.csv", encoding='latin-1')
fao_crop_yield_data.columns = fao_crop_yield_data.columns.str.lstrip("Y")

# Split the data by type
area_harvested = fao_crop_yield_data[fao_crop_yield_data.Element == 'Area harvested']
crop_yield = fao_crop_yield_data[fao_crop_yield_data.Element == 'Yield']
crop_production = fao_crop_yield_data[fao_crop_yield_data.Element == 'Production']

'''
crop_production_population = fao_crop_yield_data[fao_crop_yield_data.Element == 'Prod Popultn']
crop_stocks = fao_crop_yield_data[fao_crop_yield_data.Element == 'Stocks']
crop_laying = fao_crop_yield_data[fao_crop_yield_data.Element == 'Laying']
crop_producing_animals_slaughtered = fao_crop_yield_data[fao_crop_yield_data.Element == 'Producing Animals/Slaughtered']
crop_yield_carcass_weight = fao_crop_yield_data[fao_crop_yield_data.Element == 'Yield/Carcass Weight']
crop_milk_animals = fao_crop_yield_data[fao_crop_yield_data.Element == 'Milk Animals']
'''

# Remove larger regions to leave only countries
regions = [
    'World', 'Africa', 'Eastern Africa', 'Middle Africa', 'Northern Africa', 'Southern Africa', 'Western Africa', 'Americas',
    'Northern America', 'Central America', 'Caribbean', 'South America', 'Asia', 'Central Asia', 'Eastern Asia',
    'Southern Asia', 'South-eastern Asia', 'Western Asia', 'Europe', 'Eastern Europe', 'Northern Europe', 'Southern Europe',
    'Western Europe', 'Oceania', 'Australia and New Zealand', 'Melanesia', 'Micronesia', 'Polynesia'
    ]
special_groups = [
    'European Union (27)', 'Least Developed Countries', 'Land Locked Developing Countries',
    'Small Island Developing States', 'Low Income Food Deficit Countries', 'Net Food Importing Developing Countries'
    ]

area_harvested = area_harvested.loc[~area_harvested.Area.isin(regions+special_groups)]
crop_yield  = crop_yield.loc[~crop_yield.Area.isin(regions+special_groups)]
crop_production  = crop_production.loc[~crop_production.Area.isin(regions+special_groups)]

# Replace 0's with NaN's to prevent incorrect projections from weather data.
area_harvested.replace(0.0, np.nan, inplace=True)
crop_yield.replace(0.0, np.nan, inplace=True)
crop_production.replace(0.0, np.nan, inplace=True)

fao_crop_yield_data.head(5)

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,ha,,,,,,,,,,,,,,,0.0,5900.0,6000.0,6000.0,6000.0,5800.0,5800.0,5800.0,5700.0,5700.0,5600.0,5500.0,5500.0,5400.0,5400.0,6037.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,7000.0,9000.0,5500.0,5700.0,12000.0,11768.0,12000.0,12000.0,12000.0,11029.0,11210.0,13469.0,13490.0,14114.0,13703.0,14676.0,19481.0,19793.0,20053.0,29203.0,22134.0
1,2,Afghanistan,221,"Almonds, with shell",5419,Yield,hg/ha,,,,,,,,,,,,,,,,16610.0,15000.0,20000.0,17500.0,17069.0,13793.0,18966.0,17018.0,18421.0,16071.0,18182.0,16364.0,16667.0,16296.0,15736.0,16364.0,18000.0,16364.0,16364.0,16364.0,16364.0,16364.0,16364.0,20000.0,17143.0,16667.0,21407.0,24561.0,12250.0,13282.0,16667.0,26234.0,35000.0,39154.0,49955.0,45000.0,45960.0,29910.0,19996.0,16521.0,16859.0,13788.0,17161.0,13083.0,17759.0
2,2,Afghanistan,221,"Almonds, with shell",5510,Production,tonnes,,,,,,,,,,,,,,,0.0,9800.0,9000.0,12000.0,10500.0,9900.0,8000.0,11000.0,9700.0,10500.0,9000.0,10000.0,9000.0,9000.0,8800.0,9500.0,9000.0,9900.0,9000.0,9000.0,9000.0,9000.0,9000.0,9000.0,11000.0,12000.0,15000.0,11774.0,14000.0,14700.0,15630.0,20000.0,31481.0,42000.0,43183.0,56000.0,60611.0,62000.0,42215.0,27400.0,24246.0,32843.0,27291.0,34413.0,38205.0,39307.0
3,2,Afghanistan,711,"Anise, badian, fennel, coriander",5312,Area harvested,ha,,,,,,,,,,,,,,,,,,,,,,,,,,700.0,700.0,300.0,1100.0,1300.0,1882.0,700.0,2270.0,2871.0,3000.0,7000.0,4000.0,12000.0,11311.0,4000.0,1600.0,3300.0,6800.0,15000.0,16000.0,17432.0,28000.0,15000.0,17748.0,17000.0,19500.0,18500.0,18500.0,30000.0,25000.0,24500.0,26500.0,25333.0,25444.0,25759.0
4,2,Afghanistan,711,"Anise, badian, fennel, coriander",5419,Yield,hg/ha,,,,,,,,,,,,,,,,,,,,,,,,,,7143.0,7143.0,6667.0,7273.0,7692.0,7072.0,7143.0,6855.0,6761.0,6667.0,6000.0,6250.0,5917.0,6189.0,6250.0,6250.0,6061.0,6029.0,6000.0,6250.0,6222.0,6071.0,6000.0,6202.0,6000.0,6414.0,6757.0,6757.0,7167.0,7200.0,7075.0,7149.0,7142.0,7123.0,7138.0


In [122]:
area_harvested

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,ha,,,,,,,,,,,,,,,,5900.0,6000.0,6000.0,6000.0,5800.0,5800.0,5800.0,5700.0,5700.0,5600.0,5500.0,5500.0,5400.0,5400.0,6037.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,5500.0,7000.0,9000.0,5500.0,5700.0,12000.0,11768.0,12000.0,12000.0,12000.0,11029.0,11210.0,13469.0,13490.0,14114.0,13703.0,14676.0,19481.0,19793.0,20053.0,29203.0,22134.0
3,2,Afghanistan,711,"Anise, badian, fennel, coriander",5312,Area harvested,ha,,,,,,,,,,,,,,,,,,,,,,,,,,700.0,700.0,300.0,1100.0,1300.0,1882.0,700.0,2270.0,2871.0,3000.0,7000.0,4000.0,12000.0,11311.0,4000.0,1600.0,3300.0,6800.0,15000.0,16000.0,17432.0,28000.0,15000.0,17748.0,17000.0,19500.0,18500.0,18500.0,30000.0,25000.0,24500.0,26500.0,25333.0,25444.0,25759.0
6,2,Afghanistan,515,Apples,5312,Area harvested,ha,2220.0,2220.0,2220.0,2350.0,2480.0,2740.0,3060.0,2990.0,3040.0,3060.0,3060.0,3150.0,3210.0,3270.0,3270.0,3270.0,3280.0,3250.0,3150.0,3150.0,3080.0,2960.0,2850.0,2730.0,2660.0,2590.0,2230.0,2330.0,2340.0,2365.0,2346.0,2308.0,2300.0,2350.0,2631.0,3135.0,3748.0,4410.0,4930.0,5598.0,5482.0,5045.0,3935.0,2311.0,2751.0,7000.0,7000.0,7000.0,8550.0,8550.0,8863.0,9148.0,10341.0,12247.0,13038.0,19365.0,26847.0,28381.0,27559.0,25643.0
9,2,Afghanistan,526,Apricots,5312,Area harvested,ha,4820.0,4820.0,4820.0,5100.0,5370.0,5930.0,6620.0,6480.0,6590.0,6630.0,6630.0,6810.0,6950.0,7070.0,7080.0,7080.0,7100.0,7050.0,6810.0,6820.0,6680.0,6410.0,6170.0,5920.0,5750.0,5610.0,4830.0,5050.0,5060.0,5115.0,4876.0,4951.0,5100.0,5119.0,5150.0,5325.0,5407.0,5505.0,5612.0,5754.0,5200.0,6272.0,7007.0,5200.0,7223.0,8030.0,8000.0,8000.0,8170.0,8320.0,8320.0,8350.0,9005.0,9005.0,9116.0,8595.0,18067.0,18510.0,17719.0,17481.0
13,2,Afghanistan,44,Barley,5312,Area harvested,ha,350000.0,350000.0,350000.0,350000.0,350000.0,350000.0,316000.0,317000.0,317000.0,315000.0,315000.0,320000.0,320000.0,340200.0,320000.0,310000.0,310000.0,310000.0,304000.0,306000.0,278000.0,257000.0,242000.0,234000.0,221000.0,213000.0,213000.0,212000.0,211000.0,205000.0,204000.0,250000.0,250000.0,250000.0,225000.0,220000.0,200000.0,200000.0,180000.0,124000.0,87000.0,236000.0,270000.0,315000.0,240000.0,236000.0,236000.0,236000.0,267000.0,212000.0,190000.0,280000.0,278000.0,342472.0,282000.0,219208.0,68179.0,84147.0,84070.0,86099.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59797,181,Zimbabwe,1726,"Pulses, Total",5312,Area harvested,ha,32731.0,32595.0,31954.0,31174.0,36436.0,42000.0,42000.0,42000.0,42000.0,40200.0,42200.0,42300.0,43300.0,45300.0,45400.0,46400.0,47500.0,42500.0,42600.0,42600.0,36864.0,65225.0,66200.0,66400.0,67600.0,68800.0,70000.0,71200.0,71800.0,72730.0,73258.0,64745.0,66235.0,77264.0,62959.0,21012.0,21738.0,25311.0,19817.0,20638.0,20889.0,20436.0,21095.0,74362.0,53343.0,72529.0,80105.0,71259.0,68694.0,85058.0,59531.0,121952.0,146226.0,115044.0,126583.0,111444.0,112562.0,112610.0,104852.0,110032.0
59800,181,Zimbabwe,1720,"Roots and Tubers, Total",5312,Area harvested,ha,16189.0,16436.0,16501.0,16012.0,16008.0,16405.0,16230.0,16500.0,16750.0,16850.0,17500.0,18200.0,18400.0,18900.0,18117.0,18711.0,18386.0,19081.0,19900.0,20277.0,19821.0,20414.0,21000.0,21750.0,22150.0,22450.0,23530.0,24600.0,25700.0,26584.0,27741.0,32323.0,35723.0,37479.0,40676.0,40738.0,42432.0,41722.0,44519.0,44743.0,46717.0,47936.0,49687.0,51374.0,52662.0,53405.0,54625.0,55325.0,57703.0,59269.0,60788.0,62228.0,63069.0,64906.0,64226.0,64177.0,65280.0,65210.0,65824.0,67680.0
59808,181,Zimbabwe,1723,Sugar Crops Primary,5312,Area harvested,ha,3800.0,6220.0,10708.0,12185.0,13844.0,22400.0,13500.0,13000.0,16000.0,19501.0,16315.0,19486.0,21883.0,21190.0,22769.0,25328.0,26419.0,24677.0,24518.0,24515.0,34146.0,31547.0,33033.0,33048.0,31644.0,33000.0,31339.0,31853.0,33699.0,32379.0,31506.0,14000.0,9000.0,31887.0,34805.0,38207.0,43000.0,50000.0,43000.0,44000.0,40958.0,38791.0,45000.0,44000.0,44788.0,43480.0,35580.0,35320.0,36174.0,40663.0,42828.0,53486.0,46605.0,43121.0,43094.0,43500.0,41000.0,45000.0,46000.0,46879.0
59811,181,Zimbabwe,1729,"Treenuts, Total",5312,Area harvested,ha,,,,,,,,,,,,,,,,,,,70.0,100.0,130.0,130.0,130.0,200.0,250.0,250.0,250.0,430.0,490.0,800.0,800.0,660.0,552.0,660.0,496.0,670.0,671.0,800.0,900.0,1000.0,1300.0,1450.0,1600.0,1950.0,1969.0,2000.0,1869.0,2000.0,2500.0,3200.0,4000.0,4000.0,4000.0,4024.0,4106.0,3700.0,3494.0,3389.0,3321.0,3391.0


In [120]:
# Import data aggregation codes per year
file_name = 'Production_Crops_Livestock_E_All_Data'
fao_crop_yield_data_yield_and_flags = pd.read_csv(f"./data/fao_crop_data/unnormalized/{file_name}.csv", encoding='latin-1')
fao_crop_yield_data_yield_and_flags.columns = fao_crop_yield_data_yield_and_flags.columns.str.lstrip("Y")
# Extract flag columns
flag_cols = fao_crop_yield_data_yield_and_flags.columns[fao_crop_yield_data_yield_and_flags.columns.str.contains('F')].to_list()
cols_to_keep = fao_crop_yield_data_yield_and_flags.columns[:7].tolist()+flag_cols
fao_crop_yield_data_flags = fao_crop_yield_data_yield_and_flags[cols_to_keep].copy()
fao_crop_yield_data_flags.columns = fao_crop_yield_data_flags.columns.str.rstrip("F")
# Replace missing/unclear values
fao_crop_yield_data_flags.replace(np.nan, 'O', inplace=True)
fao_crop_yield_data_flags.replace('*', 'U', inplace=True)

fao_crop_yield_data_flags.head(5)

Unnamed: 0,Area Code,Area,Item Code,Item,Element Code,Element,Unit,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,2,Afghanistan,221,"Almonds, with shell",5312,Area harvested,ha,O,O,O,O,O,O,O,O,O,O,O,O,O,O,F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,Im,O,O,O,O,O,O,O,O,O,F,F,O,O,U,Im,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O
1,2,Afghanistan,221,"Almonds, with shell",5419,Yield,hg/ha,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc
2,2,Afghanistan,221,"Almonds, with shell",5510,Production,tonnes,O,O,O,O,O,O,O,O,O,O,O,O,O,O,F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,O,O,O,O,O,O,O,O,F,F,F,Im,U,U,U,O,O,O,U,O,O,O,O,O,O,O,O,O,O,O
3,2,Afghanistan,711,"Anise, badian, fennel, coriander",5312,Area harvested,ha,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,M,F,F,F,F,F,Im,F,Im,Im,F,F,F,F,Im,F,F,F,F,F,F,Im,F,F,Im,F,Im,F,F,F,F,Im,Im,Im,Im,Im
4,2,Afghanistan,711,"Anise, badian, fennel, coriander",5419,Yield,hg/ha,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,O,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc,Fc


In [115]:
# Import data aggregation codes
file_name = 'Production_Crops_Livestock_E_Flags'
fao_data_description = pd.read_csv(f"./data/fao_crop_data/unnormalized/{file_name}.csv", encoding='latin-1')
fao_data_description.replace('<blank>', 'O', inplace=True)
fao_data_description.replace('*', 'U', inplace=True)
fao_data_description

Unnamed: 0,Flag,Description
0,U,Unofficial figure
1,O,Official data
2,A,Aggregate; may include official; semi-official...
3,F,FAO estimate
4,Fc,Calculated data
5,Im,FAO data based on imputation methodology
6,M,Data not available


In [116]:
# Import product item codes
file_name = 'Production_Crops_Livestock_E_ItemCodes'
fao_item_codes = pd.read_csv(f"./data/fao_crop_data/unnormalized/{file_name}.csv", encoding='latin-1')
fao_item_codes.head()

Unnamed: 0,Item Code,CPC Code,Item
0,101,'01195,Canary seed
1,1016,'02123,Goats
2,1017,'21116,Meat; goat
3,1018,'21156,Offals; edible; goats
4,1019,'21515,Fat; goats


Unnamed: 0,Item Code,CPC Code,Item
4,1019,'21515,Fat; goats


array(['Area harvested', 'Yield', 'Production', 'Stocks', 'Laying',
       'Producing Animals/Slaughtered', 'Yield/Carcass Weight',
       'Milk Animals', 'Prod Popultn'], dtype=object)