# Econmic Model 1: Predict Retail with 9 features
## Random Forest Decision Tree
This model employs a Random Forest Regressor Decision Tree algorithm to predict retail coffe price bassed on economic metrics such as  supply, trade, inventories, consumption, energy costs, and commodity pricing data. Data was obtained from International Coffee Organization (The ICO Historic Data). 

### Scoring Metrics:
- Training Data Score: 0.9597823614739235
- Testing Data Score: 0.8959434042614792
- MSE:  0.12347079250000448
- RMSE:  0.06173539625000224

In [1]:
# Dependencies
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn import tree
import os
import matplotlib.pyplot as plt

In [2]:
# Machine learning libraries
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [32]:
# import processed data
path = "Trends/"
file = "finalFeatures.csv"
path_file = path + file
df = pd.read_csv(path_file)
#df

In [4]:
df.shape

(29, 11)

In [33]:
# Optimize data
# Best combination was 9 features

df = df.drop(columns =['Coffe Imports (In 1K 60-kg bags) Calendar Year 137c',
                  ])
#df

In [34]:
df.shape

(29, 10)

In [35]:
# Create dataframe for running decision tree
# This was done to upload existing code with similar variables
# Main Data set: data_df

data_df = df.drop("retail", axis=1)
target = df["retail"]
Years = df["Year"]


feature_names = data_df.columns
data_df.head()

Unnamed: 0,Year,Brent ave closing USD,Coffe Export (In 1K 60-kg bags) crop year,Coffe Produciton (In 1K 60-kg bags) crop year,Domestic Consumption (In 1K 60-kg bags),ICO composite (USD $/lb),Inventories (In 1K 60-kg bags),re-exports (In 1K 60-kg bags) Calendar Year 35c,WTI ave closing price USD
0,1990,23.76,73887,93230,19509,0.72,15619,8769,24.53
1,1991,20.04,79768,101269,20228,0.67,13200,9770,21.54
2,1992,19.32,76957,98523,20951,0.53,16843,10691,20.58
3,1993,17.01,74130,91762,21098,0.62,14218,11041,18.43
4,1994,15.86,65962,93314,21360,1.34,12667,12833,17.2


In [36]:
# needed for ploting final graph
# Years

In [9]:
# Create dataframe for running decision tree
# This was done to upload existing code with similar variables
# Main Data set: data_df

x = df.drop("retail", axis=1)
y = df["retail"]
target = y
data_df = x
print(x.shape)
print(y.shape)

(29, 9)
(29,)


### RandomForestRegressor

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [11]:
#from sklearn.ensemble import RandomForestRegressor
Econ_model = RandomForestRegressor(n_estimators=200)
Econ_model = Econ_model.fit(X_train, y_train)
Econ_model.score(X_test, y_test)

0.907418659296

In [12]:
print('Test Acc: %.3f' % Econ_model.score(X_test, y_test))

Test Acc: 0.907


In [26]:
print(f"Training Data Score: {Econ_model.score(X_train, y_train)}")
print(f"Testing Data Score: {Econ_model.score(X_test, y_test)}")

Training Data Score: 0.9576906545597044
Testing Data Score: 0.907418659296


In [31]:
# Generates feature impact data 
impact_data = sorted(zip(Econ_model.feature_importances_.round(decimals=4), feature_names), reverse=True)
impact_data_df = pd.DataFrame(impact_data)
impact_data_df = impact_data_df.rename(columns = {0:"Impact", 1:"Feature"})
impact_data_df = impact_data_df[["Feature", "Impact"]]
impact_data_df

Unnamed: 0,Feature,Impact
0,ICO composite (USD $/lb),0.5821
1,Brent ave closing USD,0.0762
2,Coffe Export (In 1K 60-kg bags) crop year,0.0571
3,Domestic Consumption (In 1K 60-kg bags),0.0546
4,WTI ave closing price USD,0.0535
5,Coffe Produciton (In 1K 60-kg bags) crop year,0.0525
6,Year,0.05
7,Inventories (In 1K 60-kg bags),0.0396
8,re-exports (In 1K 60-kg bags) Calendar Year 35c,0.0344


In [None]:
# Model stats and needed for ploting first graph

In [None]:
ypred = Econ_model.predict(X_test)

mse = mean_squared_error(y_test, ypred)
print("MSE: ", mse)
print("RMSE: ", mse*(1/2.0)) 

# Taken from refrence
# Target MSE:  0.130713987032462
# Target RMSE:  0.065356993516231 

In [None]:
print(f"train set shape: {y_test.shape}")
print(f"Test set shape:  {ypred.shape}")

In [None]:
x_ax = range(len(y_test))
plt.plot(x_ax, y_test, linewidth=2, label="original", color ="green")
plt.plot(x_ax, ypred, linewidth=3.0, label="predicted", color = "red")
plt.title("y-test and y-predicted data")
plt.xlabel('X-axis (# Retail data points)')
plt.ylabel('Y-axis (Retail values: USD/lb)')
plt.legend(loc='best',fancybox=True, shadow=True)
plt.grid(True)
plt.show() 

In [None]:
 # save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'z1_Economic_model_9_inputs.sav'
joblib.dump(Econ_model, filename)

In [None]:
# Set up for desired graph

In [None]:
y_pred_bs = Econ_model.predict(data_df)

In [None]:
x_ax = Years
plt.plot(x_ax, target, linewidth=2, label="Original", color ="green")
plt.plot(x_ax, y_pred_bs, linewidth=3.0, label="Predicted", color = "red")
plt.title("Predicted Retail Price-Economic Model")
plt.xlabel('1990-2018')
plt.ylabel('Retail values (USD/lb)')
plt.legend(loc='best',fancybox=True, shadow=True)
plt.grid(True)
plt.savefig('Econimic_model.png')
plt.show() 

In [None]:
print(f"Training Data Score: {Econ_model.score(X_train, y_train).round(decimals=4)}")
print(f"Testing Data Score:  {Econ_model.score(X_test, y_test).round(decimals=4)}")