This notebook wraps up customized functions into one loop to automate ARIMA model optimization for all item-by-country combinations. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from collections import defaultdict

# For ARIMA model
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

### Customized functions to do ARIMA modeling ###
import functions.crop_by_country_arima_analyses as ca

In [2]:
# Read in processed data
items_by_country = pd.read_csv('../data/processed/items_by_country.csv',index_col=0)
print(items_by_country.shape)

# Put name for years into a column
# The years list is used by the function below when reshaping data frame 
year = items_by_country.columns[5:-1].tolist() # Select year 1986-2017

(223049, 38)


In [3]:
items_by_country.head()

Unnamed: 0,Reporter Countries,Item,Element,Unit,Item Code,Y1986,Y1987,Y1988,Y1989,Y1990,...,Y2009,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,NoneZero
0,Afghanistan,Almonds shelled,Export Quantity,tonnes,5313,0,0,0,0,0,...,4763,1308,2261,0,0,2714,2086,1778,2756,7
1,Afghanistan,Almonds shelled,Export Value,1000 US$,5313,0,0,0,0,0,...,35476,15894,20270,0,0,16454,12793,10934,19677,7
2,Afghanistan,Almonds shelled,Import Quantity,tonnes,1617,0,0,0,0,0,...,0,0,0,0,0,168,181,846,103,4
3,Afghanistan,Almonds shelled,Import Value,1000 US$,1617,0,0,0,0,0,...,0,0,0,0,0,1117,1377,4988,759,4
4,Afghanistan,"Almonds, with shell",Export Quantity,tonnes,3315,0,0,0,0,0,...,11066,779,1016,0,0,1856,1660,1545,875,7


In [67]:
items_by_country['Unit'].unique()

array(['tonnes', '1000 US$', 'Head', '1000 Head', 'No'], dtype=object)

In [4]:
item_list = items_by_country.Item.unique().tolist()

In [28]:
# Start an empty dictionary to store country names for each item
all_item_countries_dict = defaultdict(list) 

In [122]:
# Iterate through items and optimize ARIMA parameters for each country, then make predictions
# Save country names for each item analyzed
# Output model parameters and predicted values

# Assign variables
data = items_by_country.copy()
element = 'Export Quantity' # or Export/Import Value
years = year

for i in item_list:
    item = i 
    item_countries_dict = defaultdict(list) # Dict to store country names for one item
    try:
        country_names = ca.arima_pipeline(data, item, element, years)
        item_countries_dict[item].append(country_names)
    except Exception as inst: # catch exception instance
        print('An exception occurred for item {}'.format(item))
        print(inst.args) # arguments stored in .args
        continue
    all_item_countries_dict.update(item_countries_dict) # update dict
    f = open("../data/processed/all_item_countries_dict.pkl","wb")
    pickle.dump(all_item_countries_dict,f) # save dict 
    f.close()
    print(datetime.datetime.now(),'\n')

In [54]:
# populated dictionary after running the above loop for some items
all_item_countries_dict.keys()

dict_keys(['Asparagus', 'Avocados', 'Bacon and ham', 'Bananas', 'Barley', 'Beans, dry', 'Beans, green', 'Beer of barley', 'Beeswax', 'Beet pulp', 'Animals, live, non-food', 'Anise, badian, fennel, coriander', 'Apples', 'Apricots', 'Apricots, dry', 'Areca nuts', 'Artichokes', 'Germ, maize', 'Gooseberries', 'Grain, mixed', 'Hops', 'Juice, lemon, concentrated', 'Karite nuts (sheanuts)', 'Kola nuts', 'Meat, horse', 'Mules', 'Offals, liver duck', 'Offals, liver geese', 'Oil, safflower', 'Onions, shallots, green', 'Pyrethrum, extraction', 'Rabbits and hares', 'Skins, goat, wet salted', 'Skins, sheep, wet salted', 'Starch, cassava', 'Triticale', 'Waxes vegetable', 'Whey, condensed', 'Camelids, other', 'Feed minerals', 'Feed supplements', 'Sugar crops nes', 'Offals, edible, goats', 'Rodents, other', 'Hay (unspecified)', 'Hides, horse, dry salted', 'Manila fibre (abaca)', 'Peppermint', 'Cheese, sheep milk', 'Milk, whole fresh sheep', 'Fat, camels', 'Fonio', 'Bambara beans', 'Bulgur', 'Hay (clov

In [55]:
all_item_countries_dict['Avocados']

[['Argentina',
  'Australia',
  'Austria',
  'Belgium',
  'Brazil',
  'Canada',
  'Chile',
  'China, Hong Kong SAR',
  'Colombia',
  'Costa Rica',
  'Czechia',
  'Denmark',
  'Dominica',
  'Finland',
  'France',
  'Germany',
  'Greece',
  'Guatemala',
  'Guyana',
  'Honduras',
  'Indonesia',
  'Ireland',
  'Italy',
  'Luxembourg',
  'Mauritius',
  'Mexico',
  'Morocco',
  'Netherlands',
  'New Zealand',
  'Nicaragua',
  'Norway',
  'Peru',
  'Philippines',
  'Portugal',
  'Singapore',
  'Slovenia',
  'South Africa',
  'Spain',
  'Sweden',
  'Switzerland',
  'Turkey',
  'Uganda',
  'United Kingdom',
  'United States of America',
  'Venezuela (Bolivarian Republic of)',
  'Zimbabwe']]

**Summary**  
Now that our modeling process has been streamlined, we can move forward to build dashboard for easy visualization and simulations by users. See codes here [dash_export.py](https://github.com/BrachyS/crop-trends/blob/master/dashboard/dash_export.py) 