In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from collections import defaultdict

# For ARIMA model
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

# Customized functions to do ARIMA modeling
import functions.crop_by_country_arima_analyses as ca

In [2]:
# Read in processed data
items_by_country = pd.read_csv('../data/processed/items_by_country.csv',index_col=0)
print(items_by_country.shape)

# Put name for years into a column
# The years list is used by the function below when reshaping data frame 
year = items_by_country.columns[5:-1].tolist() # Select year 1986-2017

(223049, 38)


In [3]:
items_by_country.head()

Unnamed: 0,Reporter Countries,Item,Element,Unit,Item Code,Y1986,Y1987,Y1988,Y1989,Y1990,...,Y2009,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,NoneZero
0,Afghanistan,Almonds shelled,Export Quantity,tonnes,5313,0,0,0,0,0,...,4763,1308,2261,0,0,2714,2086,1778,2756,7
1,Afghanistan,Almonds shelled,Export Value,1000 US$,5313,0,0,0,0,0,...,35476,15894,20270,0,0,16454,12793,10934,19677,7
2,Afghanistan,Almonds shelled,Import Quantity,tonnes,1617,0,0,0,0,0,...,0,0,0,0,0,168,181,846,103,4
3,Afghanistan,Almonds shelled,Import Value,1000 US$,1617,0,0,0,0,0,...,0,0,0,0,0,1117,1377,4988,759,4
4,Afghanistan,"Almonds, with shell",Export Quantity,tonnes,3315,0,0,0,0,0,...,11066,779,1016,0,0,1856,1660,1545,875,7


In [4]:
item_list = items_by_country.Item.unique().tolist()

In [5]:
len(item_list)

424

In [6]:
item_list[0:50]

['Almonds shelled',
 'Almonds, with shell',
 'Animals live nes',
 'Animals, live, non-food',
 'Anise, badian, fennel, coriander',
 'Apples',
 'Apricots',
 'Apricots, dry',
 'Areca nuts',
 'Artichokes',
 'Asparagus',
 'Avocados',
 'Bacon and ham',
 'Bananas',
 'Barley',
 'Beans, dry',
 'Beans, green',
 'Beer of barley',
 'Beeswax',
 'Beet pulp',
 'Beverages, distilled alcoholic',
 'Beverages, fermented rice',
 'Beverages, non alcoholic',
 'Bran, maize',
 'Bran, sorghum',
 'Bran, wheat',
 'Brazil nuts, shelled',
 'Bread',
 'Broad beans, horse beans, dry',
 'Buckwheat',
 'Buffaloes',
 'Butter, cow milk',
 'Buttermilk, curdled, acidified milk',
 'Cabbages and other brassicas',
 'Cake, copra',
 'Cake, cottonseed',
 'Cake, groundnuts',
 'Cake, linseed',
 'Cake, rapeseed',
 'Cake, soybeans',
 'Cake, sunflower',
 'Camels',
 'Carrots and turnips',
 'Cashew nuts, shelled',
 'Cashew nuts, with shell',
 'Cassava dried',
 'Cattle',
 'Cauliflowers and broccoli',
 'Cereal preparations nes',
 'Cereals

In [28]:
all_item_countries_dict = defaultdict(list) 

In [15]:
# Iterate through items and optimize ARIMA parameters for each country, then make predictions
# Save country names for each item analyzed
# item_countries = defaultdict(list) processed item_list[3:10]
# arima_item_countries_1 # item_list[10:20]

# Dict to store country names for each item
# Assign variables
data = items_by_country.copy()
element = 'Export Quantity' # or Export/Import Value
years = year

for i in item_list[10:20]:
    item = i 
    item_countries_dict = defaultdict(list) # Dict to store country names for one item
    try:
        country_names = ca.arima_pipeline(data, item, element, years)
        item_countries_dict[item].append(country_names)
    except Exception as inst: # catch exception instance
        print('An exception occurred for item {}.'format(item))
        print(inst.args) # arguments stored in .args
        continue
    all_item_countries_dict.update(item_countries_dict)
    print(datetime.datetime.now(),'\n')

44 country names selected for those with >50% non-zero data
There were 13 non-stationary countries being removed and
 result in 31 stationary countries
Data for Asparagus processed
Argentina (2, 0, 2) MSE=1.73E+03 MAPE=2.95E-01
Belgium (2, 0, 0) MSE=2.26E+04 MAPE=5.70E-02
Canada (1, 0, 0) MSE=8.84E+03 MAPE=5.27E-02
Chile (1, 0, 1) MSE=2.87E+04 MAPE=1.60E+01
China, mainland (6, 0, 0) MSE=1.07E+04 MAPE=5.93E-01
Colombia (3, 0, 1) MSE=2.29E+03 MAPE=4.92E+00
Costa Rica (3, 0, 1) MSE=3.52E+00 MAPE=5.87E-01
Czechia (1, 0, 1) MSE=1.07E+03 MAPE=3.52E-01
Denmark (1, 0, 0) MSE=1.05E+03 MAPE=2.78E-01
France (1, 0, 2) MSE=3.65E+04 MAPE=4.03E-02
Germany (5, 0, 0) MSE=9.24E+04 MAPE=6.92E-02
Guatemala (4, 0, 0) MSE=8.60E+02 MAPE=1.14E-01
Indonesia (1, 0, 0) MSE=2.93E+01 MAPE=1.66E+00
Ireland (1, 0, 0) MSE=8.74E+02 MAPE=6.68E-01
Italy (1, 0, 0) MSE=4.99E+05 MAPE=1.33E-01
Luxembourg (6, 0, 1) MSE=1.27E+01 MAPE=7.64E-02
Malaysia (1, 0, 2) MSE=1.13E+02 MAPE=5.56E-01
Mexico (3, 0, 0) MSE=5.67E+07 MAPE=7.1

Madagascar (1, 0, 0) MSE=9.14E+02 MAPE=4.23E-01
Malaysia (4, 0, 0) MSE=7.23E+06 MAPE=1.10E-01
Mexico (2, 0, 1) MSE=1.90E+09 MAPE=1.38E-01
Netherlands (2, 0, 0) MSE=1.15E+09 MAPE=1.11E-01
New Zealand (1, 0, 0) MSE=2.72E+02 MAPE=5.19E-01
Nicaragua (5, 0, 0) MSE=1.17E+08 MAPE=2.02E-01
Oman (0, 0, 2) MSE=2.12E+05 MAPE=1.36E+02
Pakistan (1, 0, 0) MSE=2.21E+08 MAPE=2.20E-01
Panama (1, 0, 2) MSE=2.79E+09 MAPE=1.80E-01
Paraguay (1, 0, 0) MSE=4.95E+07 MAPE=2.95E-01
Peru (1, 0, 0) MSE=1.21E+09 MAPE=4.68E-01
Philippines (3, 0, 0) MSE=6.06E+10 MAPE=8.86E-02
Poland (1, 0, 0) MSE=3.65E+07 MAPE=1.65E-01
Portugal (3, 0, 0) MSE=1.30E+07 MAPE=2.00E-01
Republic of Korea (2, 0, 0) MSE=1.25E+04 MAPE=5.86E-01
Romania (1, 0, 0) MSE=8.43E+05 MAPE=3.06E+00
Rwanda (0, 0, 1) MSE=6.88E+00 MAPE=5.84E-01
Saint Lucia (3, 0, 1) MSE=2.22E+07 MAPE=1.22E+03
Saudi Arabia (1, 0, 0) MSE=9.23E+06 MAPE=4.03E-01
Slovakia (1, 0, 0) MSE=9.42E+06 MAPE=2.34E-01
Slovenia (1, 0, 0) MSE=5.62E+07 MAPE=2.32E-01
South Africa (1, 0, 0) 

2020-04-18 13:24:08.632709 

78 country names selected for those with >50% non-zero data
There were 12 non-stationary countries being removed and
 result in 66 stationary countries
Data for Beans, green processed
Albania (1, 0, 0) MSE=5.38E+02 MAPE=5.54E+00
Australia (5, 0, 0) MSE=2.20E+04 MAPE=8.38E-02
Austria (1, 0, 0) MSE=1.36E+03 MAPE=7.66E-02
Belgium (1, 0, 0) MSE=3.01E+06 MAPE=1.13E-01
Brazil (1, 0, 1) MSE=7.16E+01 MAPE=3.58E+00
Bulgaria (1, 0, 0) MSE=3.62E+04 MAPE=1.60E+00
Burkina Faso (2, 0, 0) MSE=1.02E+04 MAPE=3.60E-01
Canada (4, 0, 0) MSE=5.66E+05 MAPE=2.03E-01
China, Taiwan Province of (1, 0, 1) MSE=5.36E+04 MAPE=2.41E+01
China, mainland (1, 0, 0) MSE=5.70E+06 MAPE=2.98E-01
Colombia (1, 0, 2) MSE=4.70E+02 MAPE=2.27E-01
Costa Rica (0, 0, 2) MSE=2.37E+03 MAPE=6.07E-01
Denmark (1, 0, 0) MSE=2.89E+03 MAPE=8.04E-01
Ecuador (5, 0, 0) MSE=3.46E+01 MAPE=1.42E+00
Egypt (1, 0, 0) MSE=6.68E+07 MAPE=4.81E+00
Ethiopia (1, 0, 0) MSE=8.97E+05 MAPE=2.23E-01
Fiji (1, 0, 0) MSE=2.57E+03 MAPE

Austria (3, 0, 2) MSE=1.08E+00 MAPE=2.78E-01
Belgium (1, 0, 2) MSE=3.74E+03 MAPE=1.97E-01
Bulgaria (1, 0, 0) MSE=3.28E+02 MAPE=3.19E-01
Canada (1, 0, 1) MSE=5.36E+02 MAPE=3.57E-02
Central African Republic (1, 0, 0) MSE=2.46E+02 MAPE=2.06E-01
Chile (1, 0, 0) MSE=2.16E+02 MAPE=7.53E-01
China, Hong Kong SAR (1, 0, 1) MSE=6.69E+01 MAPE=2.27E-01
China, Taiwan Province of (0, 0, 2) MSE=1.49E+02 MAPE=2.33E-01
China, mainland (6, 0, 0) MSE=2.12E+05 MAPE=5.01E-02
Czechia (3, 0, 0) MSE=4.45E+01 MAPE=1.03E+00
Denmark (1, 0, 0) MSE=8.38E+01 MAPE=2.92E-01
Egypt (1, 0, 0) MSE=9.32E+03 MAPE=7.07E-01
Ethiopia (4, 0, 0) MSE=2.29E+03 MAPE=1.03E-01
France (1, 0, 2) MSE=2.20E+03 MAPE=5.91E-02
Germany (1, 0, 0) MSE=5.08E+03 MAPE=4.93E-02
Greece (1, 0, 0) MSE=1.11E+03 MAPE=3.76E-01
Guatemala (4, 0, 0) MSE=1.01E+00 MAPE=2.89E-01
India (1, 0, 0) MSE=2.92E+02 MAPE=1.55E-01
Italy (4, 0, 2) MSE=1.09E+02 MAPE=1.24E-01
Japan (4, 0, 0) MSE=2.71E+01 MAPE=4.05E-02
Kenya (2, 0, 0) MSE=5.92E+02 MAPE=1.51E+00
Lithuania 

In [11]:
item_countries.keys()

dict_keys(['Animals, live, non-food', 'Anise, badian, fennel, coriander', 'Apples', 'Apricots', 'Apricots, dry', 'Areca nuts', 'Artichokes'])

In [16]:
item_list[10:20]

['Asparagus',
 'Avocados',
 'Bacon and ham',
 'Bananas',
 'Barley',
 'Beans, dry',
 'Beans, green',
 'Beer of barley',
 'Beeswax',
 'Beet pulp']

In [18]:
arima_item_countries_1['Beeswax']

[['Austria',
  'Belgium',
  'Bulgaria',
  'Canada',
  'Central African Republic',
  'Chile',
  'China, Hong Kong SAR',
  'China, Taiwan Province of',
  'China, mainland',
  'Czechia',
  'Denmark',
  'Egypt',
  'Ethiopia',
  'France',
  'Germany',
  'Greece',
  'Guatemala',
  'India',
  'Italy',
  'Japan',
  'Kenya',
  'Lithuania',
  'Madagascar',
  'Malaysia',
  'Mexico',
  'Netherlands',
  'New Zealand',
  'Norway',
  'Poland',
  'Portugal',
  'Republic of Korea',
  'Senegal',
  'Singapore',
  'Spain',
  'Sweden',
  'Switzerland',
  'Thailand',
  'United Kingdom',
  'United States of America']]

In [23]:
dict2_copy = arima_item_countries_1
dict1_copy = item_countries

In [24]:
dict2_copy.keys()

dict_keys(['Asparagus', 'Avocados', 'Bacon and ham', 'Bananas', 'Barley', 'Beans, dry', 'Beans, green', 'Beer of barley', 'Beeswax', 'Beet pulp'])

In [25]:
dict1_copy.keys()

dict_keys(['Animals, live, non-food', 'Anise, badian, fennel, coriander', 'Apples', 'Apricots', 'Apricots, dry', 'Areca nuts', 'Artichokes'])

In [26]:
dict1_copy.update(dict2_copy)

In [27]:
dict1_copy.keys()

dict_keys(['Animals, live, non-food', 'Anise, badian, fennel, coriander', 'Apples', 'Apricots', 'Apricots, dry', 'Areca nuts', 'Artichokes', 'Asparagus', 'Avocados', 'Bacon and ham', 'Bananas', 'Barley', 'Beans, dry', 'Beans, green', 'Beer of barley', 'Beeswax', 'Beet pulp'])

In [32]:
all_item_countries_dict.update(item_countries)
print(all_item_countries_dict.keys())

dict_keys(['Asparagus', 'Avocados', 'Bacon and ham', 'Bananas', 'Barley', 'Beans, dry', 'Beans, green', 'Beer of barley', 'Beeswax', 'Beet pulp', 'Animals, live, non-food', 'Anise, badian, fennel, coriander', 'Apples', 'Apricots', 'Apricots, dry', 'Areca nuts', 'Artichokes'])


In [35]:
# Save dict
import pickle

f = open("../data/processed/all_item_countries_dict.pkl","wb")
pickle.dump(all_item_countries_dict,f)
f.close()