# Corn Predictor

In [1]:
# Initial imports
import os
import numpy as np
import pandas as pd
import alpaca_trade_api as tradeapi
from pathlib import Path
from dotenv import load_dotenv
%matplotlib inline

In [2]:
# Set the random seed for reproducibility
# Note: This is used for model prototyping, but it is good practice to comment this out and run multiple experiments to evaluate your model.
from numpy.random import seed

seed(1)
from tensorflow import random

random.set_seed(2)

### Data Loading

In this activity, we will use closing prices from different stocks to make predictions of future closing prices based on the temporal data of each stock.

In [3]:
alpaca_api_key = 'PK55DVY40BM8OTB4HSVX'
alpaca_secret_key = 'VDBV4ac8Cu1MiLfxYgKSh7zJ1H7u4ifXXtKNylW6'

api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version = "v2"
)

In [4]:
print(alpaca_api_key)

PK55DVY40BM8OTB4HSVX


In [5]:
# Set timeframe to '1D'
timeframe = "1D"

# Set start and end datetimes between now and 3 years ago.
start_date = pd.Timestamp("2018-09-11", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2021-09-11", tz="America/New_York").isoformat()

# Set the ticker information
tickers = ["CORN", "INGR", "TIP"]

# Get 3 year's worth of historical price data for Microsoft and Coca-Cola
raw_corn = api.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df

# Display sample data
raw_corn.head()

Unnamed: 0_level_0,CORN,CORN,CORN,CORN,CORN,INGR,INGR,INGR,INGR,INGR,TIP,TIP,TIP,TIP,TIP
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2018-09-11 00:00:00-04:00,16.24,16.31,16.2239,16.24,32947,101.49,102.1326,100.38,101.44,273383.0,111.08,111.13,110.97,111.025,1329289.0
2018-09-12 00:00:00-04:00,16.19,16.2028,15.6799,15.7799,321211,101.41,103.44,101.215,103.11,206807.0,111.19,111.235,111.11,111.13,826503.0
2018-09-13 00:00:00-04:00,15.76,15.8,15.6262,15.7,51794,103.47,104.1,102.755,103.62,279273.0,111.12,111.12,110.96,110.96,951833.0
2018-09-14 00:00:00-04:00,15.65,15.73,15.6466,15.71,55998,103.91,104.21,102.455,104.01,304941.0,110.78,110.91,110.72,110.78,1029880.0
2018-09-17 00:00:00-04:00,15.68,15.68,15.575,15.59,97311,103.74,104.285,103.15,103.92,566205.0,110.71,110.84,110.69,110.79,1040555.0


In [6]:
corn_data = pd.DataFrame()

for c in raw_corn.columns:
    if c[1] in "close":
        corn_data[c[0]] = raw_corn[c[0]][c[1]]

In [7]:
corn_data.index= corn_data.index.date
corn_data= corn_data.dropna()
corn_data

Unnamed: 0,CORN,INGR,TIP
2018-09-11,16.2400,101.44,111.025
2018-09-12,15.7799,103.11,111.130
2018-09-13,15.7000,103.62,110.960
2018-09-14,15.7100,104.01,110.780
2018-09-17,15.5900,103.92,110.790
...,...,...,...
2021-09-03,19.6900,88.12,128.620
2021-09-07,19.3000,87.38,128.330
2021-09-08,19.3300,87.48,128.830
2021-09-09,19.2600,86.30,129.510


In [8]:
def window_data(corn_data, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(corn_data) - window):
        features = corn_data.iloc[i : (i + window), feature_col_number]
        target = corn_data.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [9]:
# Creating the features (X) and target (y) data using the window_data() function.
window_size = 5

feature_column = 2
target_column = 2
X, y = window_data(corn_data, window_size, feature_column, target_column)
print (f"X sample values:\n{X[:5]} \n")
print (f"y sample values:\n{y[:5]}")

X sample values:
[[111.025 111.13  110.96  110.78  110.79 ]
 [111.13  110.96  110.78  110.79  110.54 ]
 [110.96  110.78  110.79  110.54  110.42 ]
 [110.78  110.79  110.54  110.42  110.58 ]
 [110.79  110.54  110.42  110.58  110.59 ]] 

y sample values:
[[110.54 ]
 [110.42 ]
 [110.58 ]
 [110.59 ]
 [110.485]]


### Training

In [10]:
# Use 70% of the data for training and the remainder for testing
split = int(0.7 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
X_train

array([[111.025, 111.13 , 110.96 , 110.78 , 110.79 ],
       [111.13 , 110.96 , 110.78 , 110.79 , 110.54 ],
       [110.96 , 110.78 , 110.79 , 110.54 , 110.42 ],
       ...,
       [126.2  , 126.25 , 126.31 , 126.34 , 126.35 ],
       [126.25 , 126.31 , 126.34 , 126.35 , 126.3  ],
       [126.31 , 126.34 , 126.35 , 126.3  , 126.34 ]])

In [11]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

# Fit the MinMaxScaler object with the training feature data X_train
X_scaler.fit(X_train)

# Scale the features training and testing sets
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)

# Fit the MinMaxScaler object with the training target data y_train
y_scaler.fit(y_train)

# Scale the target training and testing sets
y_train = y_scaler.transform(y_train)
y_test = y_scaler.transform(y_test)

In [12]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print (f"X_train sample values:\n{X_train[:5]} \n")
print (f"X_test sample values:\n{X_test[:5]}")

X_train sample values:
[[[0.14547872]
  [0.15106383]
  [0.14202128]
  [0.13244681]
  [0.13297872]]

 [[0.15106383]
  [0.14202128]
  [0.13244681]
  [0.13297872]
  [0.11968085]]

 [[0.14202128]
  [0.13244681]
  [0.13297872]
  [0.11968085]
  [0.11329787]]

 [[0.13244681]
  [0.13297872]
  [0.11968085]
  [0.11329787]
  [0.12180851]]

 [[0.13297872]
  [0.11968085]
  [0.11329787]
  [0.12180851]
  [0.12234043]]] 

X_test sample values:
[[[0.96010638]
  [0.9606383 ]
  [0.95797872]
  [0.96010638]
  [0.94414894]]

 [[0.9606383 ]
  [0.95797872]
  [0.96010638]
  [0.94414894]
  [0.94042553]]

 [[0.95797872]
  [0.96010638]
  [0.94414894]
  [0.94042553]
  [0.93723404]]

 [[0.96010638]
  [0.94414894]
  [0.94042553]
  [0.93723404]
  [0.92712766]]

 [[0.94414894]
  [0.94042553]
  [0.93723404]
  [0.92712766]
  [0.93404255]]]


### Build and Train the LSTM RNN

In [13]:
# Import required Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [14]:
# Define the LSTM RNN model.
model = Sequential()

number_units = 5
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))

### Compiling the LSTM RNN Model

In [15]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [16]:
# Summarize the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 5, 5)              140       
_________________________________________________________________
dropout (Dropout)            (None, 5, 5)              0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 5, 5)              220       
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5)              0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 5)                 220       
_________________________________________________________________
dropout_2 (Dropout)          (None, 5)                 0         
_________________________________________________________________
dense (Dense)                (None, 1)                 6

### Training the Model

In [17]:
# Train the model
model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1c1926b9108>

### Model Performance

In [18]:
# Evaluate the model
model.evaluate(X_test, y_test)



0.00810630526393652

### Making Predictions

In [19]:
# Make some predictions
predicted = model.predict(X_test)

In [20]:
# Recover the original prices instead of the scaled version
predicted_prices = y_scaler.inverse_transform(predicted)
real_prices = y_scaler.inverse_transform(y_test.reshape(-1, 1))

In [21]:
corn = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
    }, index = corn_data.index[-len(real_prices): ])
corn.head()

Unnamed: 0,Real,Predicted
2020-10-20,125.97,125.813232
2020-10-21,125.91,125.807671
2020-10-22,125.72,125.793823
2020-10-23,125.85,125.774658
2020-10-26,125.96,125.73761


### Prepare data for forcasting

In [22]:
corn[-10:]["Real"]

2021-08-27    129.98
2021-08-30    129.97
2021-08-31    129.59
2021-09-01    128.72
2021-09-02    128.80
2021-09-03    128.62
2021-09-07    128.33
2021-09-08    128.83
2021-09-09    129.51
2021-09-10    129.14
Name: Real, dtype: float64

In [23]:
x_future, _ = window_data(pd.DataFrame(corn[-10:]["Real"]), 5, 0, 0)
x_future = X_scaler.transform(x_future)
x_future = x_future.reshape((x_future.shape[0],x_future.shape[1], 1))

### Forcast commodity movement

In [24]:
last = model.predict(x_future)[-1:]
last = y_scaler.inverse_transform(last)

In [25]:
corn[-9:]["Real"].append(pd.Series(last[0]))

2021-08-30    129.970000
2021-08-31    129.590000
2021-09-01    128.720000
2021-09-02    128.800000
2021-09-03    128.620000
2021-09-07    128.330000
2021-09-08    128.830000
2021-09-09    129.510000
2021-09-10    129.140000
0             126.205353
dtype: float64

In [26]:
from datetime import timedelta

def forecast_lstm(model, 
                  data,  
                  x_scaler, 
                  y_scaler, 
                  num_of_obs = 10 ,
                  steps_ahead = 15, 
                  window_size = 5,
                  target_col="Real"
                 ):
    """
    This function requires window_data to be defined
    """
    for i in range(0, steps_ahead):
        x_future, _ = window_data(pd.DataFrame(data[-num_of_obs:][target_col]), window_size, 0, 0)
        x_future = X_scaler.transform(x_future)
        x_future = x_future.reshape((x_future.shape[0],x_future.shape[1], 1))
        last = model.predict(x_future)[-1:]
        last = y_scaler.inverse_transform(last)
        #print(last)
        new_data = data[-(num_of_obs-1):].copy()
        
        last_date = new_data.iloc[[-1]].index
        last_date = last_date + timedelta(days=1)
        last_date = pd.to_datetime(last_date[0])
        
        new_data = new_data.append(pd.DataFrame(index=[last_date]))
        new_data.iloc[-1][target_col] = last
        data = new_data
        #print(data)
        #break
    return data

In [27]:
x_future, _ = window_data(pd.DataFrame(corn[-10:]["Real"]), 5, 0, 0)

In [28]:
forecast_lstm(model, pd.DataFrame(corn["Real"]), X_scaler, y_scaler)

Unnamed: 0,Real
2021-09-16,126.026848
2021-09-17,125.787498
2021-09-18,125.787193
2021-09-19,125.784859
2021-09-20,125.774834
2021-09-21,125.751816
2021-09-22,125.720695
2021-09-23,125.697487
2021-09-24,125.696404
2021-09-25,125.693993


In [30]:
first15= forecast_lstm(model, pd.DataFrame(corn["Real"]), X_scaler, y_scaler)


Unnamed: 0,Real
2021-09-16,126.026848
2021-09-17,125.787498
2021-09-18,125.787193
2021-09-19,125.784859
2021-09-20,125.774834
2021-09-21,125.751816
2021-09-22,125.720695
2021-09-23,125.697487
2021-09-24,125.696404
2021-09-25,125.693993
