In [92]:
!pip3 install autokeras




In [93]:
import autokeras as ak

In [94]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd

# Define the list of FAANG ticker symbols
tickers = ['AAPL', 'AMZN', 'NFLX', 'GOOGL']

# Set the start and end dates for the historical data
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=30*365)).strftime('%Y-%m-%d')

# Create an empty dataframe to store the stock data
combined_data_open = pd.DataFrame()
combined_data_close = pd.DataFrame()
combined_data_high = pd.DataFrame()
combined_data_low = pd.DataFrame()
combined_data_vol = pd.DataFrame()
combined_data_adj_cls = pd.DataFrame()
combined_data = pd.DataFrame()

# Download historical stock data for each ticker and combine into a single dataframe
for ticker in tickers:
    print(ticker)
    data = yf.download(ticker, start=start_date, end=end_date)
    data.columns = ticker+"_"+data.columns  # Add a column to identify the ticker
    if combined_data_open.shape[0] ==0:
      combined_data = data
      combined_data_adj_cls = data[ticker+"_Adj Close"]
      combined_data_open = data[ticker+"_Open"]
      combined_data_close = data[ticker+"_Close"]
      combined_data_high = data[ticker+"_High"]
      combined_data_low = data[ticker+"_Low"]
      combined_data_vol = data[ticker+"_Volume"]
    else:
      combined_data = pd.merge(combined_data, data, left_index=True, right_index=True)
      combined_data_open = pd.merge(combined_data_open, data[ticker+"_Open"], left_index=True, right_index=True)
      combined_data_adj_cls = pd.merge(combined_data_adj_cls, data[ticker+"_Adj Close"], left_index=True, right_index=True)
      combined_data_close = pd.merge(combined_data_close, data[ticker+"_Close"], left_index=True, right_index=True)
      combined_data_high = pd.merge(combined_data_high, data[ticker+"_High"], left_index=True, right_index=True)
      combined_data_low = pd.merge(combined_data_low, data[ticker+"_Low"], left_index=True, right_index=True)
      combined_data_vol = pd.merge(combined_data_vol, data[ticker+"_Volume"], left_index=True, right_index=True)



AAPL
[*********************100%***********************]  1 of 1 completed
AMZN
[*********************100%***********************]  1 of 1 completed
NFLX
[*********************100%***********************]  1 of 1 completed
GOOGL
[*********************100%***********************]  1 of 1 completed


In [95]:
def preprocess(data):
  df2 = data.copy()
  num_lags = 4
  delay = 1 # predict target one step ahead
  for column in df2:
      for lag in range(1,num_lags+1):
          df2[column + '_lag' + str(lag)] = df2[column].shift(lag*-1-(delay-1))
          if column != 'wnd_dir':
              df2[column + '_avg_window_length' + str(lag+1)] = df2[column].shift(-1-(delay-1)).rolling(window=lag+1,center=False).mean().shift(1-(lag+1))

  df2.dropna(inplace=True)

  mask = (df2.columns.str.contains('GOOGL_Adj Close') | df2.columns.str.contains('lag') | df2.columns.str.contains('window'))
  df_processed = df2[df2.columns[mask]]
  return df_processed

# the columns in the processed dataframe
process = True

if process == True:
  df_processed = preprocess(combined_data_adj_cls)
else:
  df_processed = combined_data_adj_cls

df_processed.columns

Index(['GOOGL_Adj Close', 'AAPL_Adj Close_lag1',
       'AAPL_Adj Close_avg_window_length2', 'AAPL_Adj Close_lag2',
       'AAPL_Adj Close_avg_window_length3', 'AAPL_Adj Close_lag3',
       'AAPL_Adj Close_avg_window_length4', 'AAPL_Adj Close_lag4',
       'AAPL_Adj Close_avg_window_length5', 'AMZN_Adj Close_lag1',
       'AMZN_Adj Close_avg_window_length2', 'AMZN_Adj Close_lag2',
       'AMZN_Adj Close_avg_window_length3', 'AMZN_Adj Close_lag3',
       'AMZN_Adj Close_avg_window_length4', 'AMZN_Adj Close_lag4',
       'AMZN_Adj Close_avg_window_length5', 'NFLX_Adj Close_lag1',
       'NFLX_Adj Close_avg_window_length2', 'NFLX_Adj Close_lag2',
       'NFLX_Adj Close_avg_window_length3', 'NFLX_Adj Close_lag3',
       'NFLX_Adj Close_avg_window_length4', 'NFLX_Adj Close_lag4',
       'NFLX_Adj Close_avg_window_length5', 'GOOGL_Adj Close_lag1',
       'GOOGL_Adj Close_avg_window_length2', 'GOOGL_Adj Close_lag2',
       'GOOGL_Adj Close_avg_window_length3', 'GOOGL_Adj Close_lag3',
       '

In [96]:
# df_processed['ds'] = df_processed.index

In [97]:
# df_processed.rename(columns = {'GOOGL_Adj Close':'y'}, inplace = True)
# df_processed.head()
# df_processed['ds']= pd.to_datetime(df_processed['ds'])

In [98]:
# df_processed[:3811]

In [99]:
df_train = df_processed[:int(0.8*df_processed.shape[0])]
df_test = df_processed[int(0.8*df_processed.shape[0]):]

In [100]:
val_split = int(len(df_train) * 0.7)
data_train = df_train[:val_split]
validation_data = df_train[val_split:]

In [101]:
y = 'GOOGL_Adj Close'
X = data_train.loc[:, data_train.columns != 'GOOGL_Adj Close'].columns.tolist()

In [102]:
X

['AAPL_Adj Close_lag1',
 'AAPL_Adj Close_avg_window_length2',
 'AAPL_Adj Close_lag2',
 'AAPL_Adj Close_avg_window_length3',
 'AAPL_Adj Close_lag3',
 'AAPL_Adj Close_avg_window_length4',
 'AAPL_Adj Close_lag4',
 'AAPL_Adj Close_avg_window_length5',
 'AMZN_Adj Close_lag1',
 'AMZN_Adj Close_avg_window_length2',
 'AMZN_Adj Close_lag2',
 'AMZN_Adj Close_avg_window_length3',
 'AMZN_Adj Close_lag3',
 'AMZN_Adj Close_avg_window_length4',
 'AMZN_Adj Close_lag4',
 'AMZN_Adj Close_avg_window_length5',
 'NFLX_Adj Close_lag1',
 'NFLX_Adj Close_avg_window_length2',
 'NFLX_Adj Close_lag2',
 'NFLX_Adj Close_avg_window_length3',
 'NFLX_Adj Close_lag3',
 'NFLX_Adj Close_avg_window_length4',
 'NFLX_Adj Close_lag4',
 'NFLX_Adj Close_avg_window_length5',
 'GOOGL_Adj Close_lag1',
 'GOOGL_Adj Close_avg_window_length2',
 'GOOGL_Adj Close_lag2',
 'GOOGL_Adj Close_avg_window_length3',
 'GOOGL_Adj Close_lag3',
 'GOOGL_Adj Close_avg_window_length4',
 'GOOGL_Adj Close_lag4',
 'GOOGL_Adj Close_avg_window_length5']

In [103]:
data_x = data_train[X]
data_y = data_train[y]
data_x_val = validation_data[X]
data_y_val = validation_data[y]

In [104]:
data_x

Unnamed: 0_level_0,AAPL_Adj Close_lag1,AAPL_Adj Close_avg_window_length2,AAPL_Adj Close_lag2,AAPL_Adj Close_avg_window_length3,AAPL_Adj Close_lag3,AAPL_Adj Close_avg_window_length4,AAPL_Adj Close_lag4,AAPL_Adj Close_avg_window_length5,AMZN_Adj Close_lag1,AMZN_Adj Close_avg_window_length2,...,NFLX_Adj Close_lag4,NFLX_Adj Close_avg_window_length5,GOOGL_Adj Close_lag1,GOOGL_Adj Close_avg_window_length2,GOOGL_Adj Close_lag2,GOOGL_Adj Close_avg_window_length3,GOOGL_Adj Close_lag3,GOOGL_Adj Close_avg_window_length4,GOOGL_Adj Close_lag4,GOOGL_Adj Close_avg_window_length5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-19,0.467469,0.469593,0.471718,0.474703,0.484923,0.481432,0.501618,0.490356,1.975500,1.97400,...,2.238571,2.232000,2.710460,2.724099,2.737738,2.690857,2.624374,2.681306,2.652653,2.685135
2004-08-20,0.471718,0.478320,0.484923,0.486086,0.501618,0.496078,0.526054,0.501132,1.972500,1.96250,...,2.177143,2.212286,2.737738,2.681056,2.624374,2.671588,2.652653,2.678804,2.700450,2.674324
2004-08-23,0.484923,0.493271,0.501618,0.504198,0.526054,0.508486,0.521349,0.510360,1.952500,1.98375,...,2.164286,2.170571,2.624374,2.638513,2.652653,2.659159,2.700450,2.658471,2.656406,2.637337
2004-08-24,0.501618,0.513836,0.526054,0.516340,0.521349,0.516720,0.517858,0.518071,2.015000,2.01225,...,2.064286,2.127429,2.652653,2.676551,2.700450,2.669836,2.656406,2.640578,2.552803,2.624825
2004-08-25,0.526054,0.523701,0.521349,0.521754,0.517858,0.522184,0.523474,0.526600,2.009500,2.00225,...,1.992857,2.087429,2.700450,2.678428,2.656406,2.636553,2.552803,2.617868,2.561812,2.596046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-03-16,28.618084,28.779153,28.940222,28.760004,28.721706,28.660323,28.361279,28.659534,18.596001,18.67650,...,61.185715,60.575714,27.880501,28.094251,28.308001,28.124001,28.183500,28.154875,28.247499,28.177600
2015-03-17,28.940222,28.830964,28.721706,28.674402,28.361279,28.669897,28.656380,28.643765,18.757000,18.70950,...,60.714287,61.140285,28.308001,28.245750,28.183500,28.246333,28.247499,28.251875,28.268499,28.376900
2015-03-18,28.721706,28.541492,28.361279,28.579788,28.656380,28.569651,28.539238,28.414441,18.662001,18.79325,...,62.611427,61.101143,28.183500,28.215500,28.247499,28.233166,28.268499,28.394125,28.877001,28.385300
2015-03-19,28.361279,28.508829,28.656380,28.518965,28.539238,28.337624,27.793600,28.267565,18.924500,18.84000,...,60.250000,60.902572,28.247499,28.257999,28.268499,28.464333,28.877001,28.435750,28.350000,28.385000


In [105]:
!rm -rf /content/time_series_forecaster
predict_from = 1
predict_until = 10
lookback = 10
clf = ak.TimeseriesForecaster(
    lookback=lookback,
    predict_from=predict_from,
#    predict_until=predict_until,
    max_trials=10,
    objective="val_loss",
)
# Train the TimeSeriesForecaster with train data
clf.fit(
    x=data_x,
    y=data_y,
    validation_data=(data_x_val, data_y_val),
    batch_size=32,
    epochs = 300,
)


Trial 2 Complete [00h 00m 20s]

Best val_loss So Far: None
Total elapsed time: 00h 00m 35s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
True              |True              |timeseries_block_1/rnn_block_1/bidirectional
lstm              |lstm              |timeseries_block_1/rnn_block_1/layer_type
2                 |2                 |timeseries_block_1/rnn_block_1/num_layers
0                 |0                 |regression_head_1/dropout
adam_weight_decay |adam              |optimizer
0.001             |0.001             |learning_rate

Epoch 1/300
     82/Unknown - 13s 25ms/step - loss: 246.9207 - mean_squared_error: 246.9207

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/keras_tuner/engine/base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/usr/local/lib/python3.10/dist-packages/keras_tuner/engine/base_tuner.py", line 235, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/usr/local/lib/python3.10/dist-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/usr/local/lib/python3.10/dist-packages/autokeras/engine/tuner.py", line 101, in _build_and_fit_model
    _, history = utils.fit_with_adaptive_batch_size(
  File "/usr/local/lib/python3.10/dist-packages/autokeras/utils/utils.py", line 88, in fit_with_adaptive_batch_size
    history = run_with_adaptive_batch_size(
  File "/usr/local/lib/python3.10/dist-packages/autokeras/utils/utils.py", line 101, in run_with_adaptiv

RuntimeError: ignored

In [None]:
predictions = clf.predict(df_processed[X])
print(predictions.shape)
# Evaluate the best model with testing data.
print(clf.evaluate(data_x_val, data_y_val))

In [None]:
clf.evaluate(df_processed[X], df_processed[y])[0]

In [None]:
import numpy as np
np.sqrt(clf.evaluate(data_x_val, data_y_val))

In [None]:
np.sqrt(clf.evaluate(df_processed[X], df_processed[y]))

In [None]:
clf.directory

In [None]:
clf.outputs

In [None]:
model = clf.export_model()

In [None]:
model.summary()

In [None]:
model.save('model.h5')