In [None]:
!pip install autots
from autots import AutoTS

Collecting autots
  Downloading autots-0.5.8-py3-none-any.whl (713 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m713.7/713.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: autots
Successfully installed autots-0.5.8


In [None]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd

# Define the list of FAANG ticker symbols
tickers = ['AAPL', 'AMZN', 'NFLX', 'GOOGL']

# Set the start and end dates for the historical data
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=30*365)).strftime('%Y-%m-%d')

# Create an empty dataframe to store the stock data
combined_data_open = pd.DataFrame()
combined_data_close = pd.DataFrame()
combined_data_high = pd.DataFrame()
combined_data_low = pd.DataFrame()
combined_data_vol = pd.DataFrame()
combined_data_adj_cls = pd.DataFrame()
combined_data = pd.DataFrame()

# Download historical stock data for each ticker and combine into a single dataframe
for ticker in tickers:
    print(ticker)
    data = yf.download(ticker, start=start_date, end=end_date)
    data.columns = ticker+"_"+data.columns  # Add a column to identify the ticker
    if combined_data_open.shape[0] ==0:
      combined_data = data
      combined_data_adj_cls = data[ticker+"_Adj Close"]
      combined_data_open = data[ticker+"_Open"]
      combined_data_close = data[ticker+"_Close"]
      combined_data_high = data[ticker+"_High"]
      combined_data_low = data[ticker+"_Low"]
      combined_data_vol = data[ticker+"_Volume"]
    else:
      combined_data = pd.merge(combined_data, data, left_index=True, right_index=True)
      combined_data_open = pd.merge(combined_data_open, data[ticker+"_Open"], left_index=True, right_index=True)
      combined_data_adj_cls = pd.merge(combined_data_adj_cls, data[ticker+"_Adj Close"], left_index=True, right_index=True)
      combined_data_close = pd.merge(combined_data_close, data[ticker+"_Close"], left_index=True, right_index=True)
      combined_data_high = pd.merge(combined_data_high, data[ticker+"_High"], left_index=True, right_index=True)
      combined_data_low = pd.merge(combined_data_low, data[ticker+"_Low"], left_index=True, right_index=True)
      combined_data_vol = pd.merge(combined_data_vol, data[ticker+"_Volume"], left_index=True, right_index=True)



AAPL
[*********************100%***********************]  1 of 1 completed
AMZN
[*********************100%***********************]  1 of 1 completed
NFLX
[*********************100%***********************]  1 of 1 completed
GOOGL
[*********************100%***********************]  1 of 1 completed


In [None]:
def preprocess(data):
  df2 = data.copy()
  num_lags = 10
  delay = 1 # predict target one step ahead
  for column in df2:
      for lag in range(1,num_lags+1):
          df2[column + '_lag' + str(lag)] = df2[column].shift(lag*-1-(delay-1))
          if column != 'wnd_dir':
              df2[column + '_avg_window_length' + str(lag+1)] = df2[column].shift(-1-(delay-1)).rolling(window=lag+1,center=False).mean().shift(1-(lag+1))

  df2.dropna(inplace=True)

  mask = (df2.columns.str.contains('GOOGL_Adj Close') | df2.columns.str.contains('lag') | df2.columns.str.contains('window'))
  df_processed = df2[df2.columns[mask]]
  return df_processed

# the columns in the processed dataframe
process = False

if process == True:
  df_processed = preprocess(combined_data_adj_cls)
else:
  df_processed = combined_data_adj_cls

df_processed.columns

Index(['AAPL_Adj Close', 'AMZN_Adj Close', 'NFLX_Adj Close',
       'GOOGL_Adj Close'],
      dtype='object')

In [None]:
df_processed.head(10)

Unnamed: 0_level_0,AAPL_Adj Close,AMZN_Adj Close,NFLX_Adj Close,GOOGL_Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-19,0.466103,1.9315,2.264286,2.511011
2004-08-20,0.467469,1.9755,2.262857,2.71046
2004-08-23,0.471718,1.9725,2.272857,2.737738
2004-08-24,0.484923,1.9525,2.208571,2.624374
2004-08-25,0.501618,2.015,2.238571,2.652653
2004-08-26,0.526054,2.0095,2.177143,2.70045
2004-08-27,0.521349,1.995,2.164286,2.656406
2004-08-30,0.517858,1.9155,2.064286,2.552803
2004-08-31,0.523474,1.907,1.992857,2.561812
2004-09-01,0.544267,1.912,2.038571,2.508759


In [None]:
df_processed['ds'] = df_processed.index

In [None]:
df_processed.shape

(4765, 5)

In [None]:
df_processed.rename(columns = {'GOOGL_Adj Close':'y'}, inplace = True)
df_processed.head()
df_processed['ds']= pd.to_datetime(df_processed['ds'])

In [None]:
# df_processed.reset_index(inplace = True, drop = True)

In [None]:
df_processed.shape

(4765, 5)

In [None]:
df_train = df_processed[:int(0.8*df_processed.shape[0])]
df_test = df_processed[int(0.8*df_processed.shape[0]):]

In [None]:
df_test.shape

(953, 5)

In [None]:
df_train

Unnamed: 0_level_0,AAPL_Adj Close,AMZN_Adj Close,NFLX_Adj Close,y,ds
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-08-19,0.466103,1.931500,2.264286,2.511011,2004-08-19
2004-08-20,0.467469,1.975500,2.262857,2.710460,2004-08-20
2004-08-23,0.471718,1.972500,2.272857,2.737738,2004-08-23
2004-08-24,0.484923,1.952500,2.208571,2.624374,2004-08-24
2004-08-25,0.501618,2.015000,2.238571,2.652653,2004-08-25
...,...,...,...,...,...
2019-10-03,53.764645,86.221001,268.149994,59.471500,2019-10-03
2019-10-04,55.271759,86.982498,272.790009,60.548000,2019-10-04
2019-10-07,55.283939,86.633003,274.459991,60.412498,2019-10-07
2019-10-08,54.636280,85.275497,270.720001,59.506500,2019-10-08


In [None]:
model = AutoTS(
    forecast_length=953,
    prediction_interval=0.95,
    model_list = ['VECM',
 'DynamicFactor',
 'GluonTS',
 'RollingRegression',
 'WindowRegression',
 'VAR',
 'MultivariateMotif',
 'NVAR'],#]'multivariate',# or ['ARIMA','ETS']
    max_generations=1,
    num_validations=1,
    no_negatives=True,
    n_jobs='auto')

model.fit(df_train)



Using 1 cpus for n_jobs.
Inferred frequency is: B
Model Number: 1 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 1 in generation 0: GluonTS
Model Number: 2 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 2 in generation 0: GluonTS
Model Number: 3 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 3 in generation 0: GluonTS
Model Number: 4 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}'



Model Number: 19 with model VECM in generation 0 of 1
Template Eval Error: ValueError("regression_type='User' but no future_regressor supplied") in model 19 in generation 0: VECM
Model Number: 20 with model DynamicFactor in generation 0 of 1
Template Eval Error: ZeroDivisionError('integer division or modulo by zero') in model 20 in generation 0: DynamicFactor
Model Number: 21 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 21 in generation 0: GluonTS
Model Number: 22 with model RollingRegression in generation 0 of 1
Template Eval Error: ValueError('future_regressor not supplied, necessary for regression_type') in model 22 in generation 0: RollingRegression
Model Number: 23 with model WindowRegression in generation 0 of 1
Model Number: 24 with model VAR in generation 0 of 1
Model Number: 25 with model MultivariateMotif in generation 

  ) = cd_fast.enet_coordinate_descent_multi_task(


Model Number: 33 with model DynamicFactor in generation 0 of 1


  result = func(self.values, **kwargs)
  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Template Eval Error: ValueError('Model DynamicFactor returned NaN for one or more series. fail_on_forecast_nan=True') in model 33 in generation 0: DynamicFactor
Model Number: 34 with model RollingRegression in generation 0 of 1




Model Number: 35 with model MultivariateMotif in generation 0 of 1
Model Number: 36 with model NVAR in generation 0 of 1
Template Eval Error: ValueError('Model NVAR returned NaN for one or more series. fail_on_forecast_nan=True') in model 36 in generation 0: NVAR
Model Number: 37 with model RollingRegression in generation 0 of 1
Template Eval Error: ValueError('future_regressor not supplied, necessary for regression_type') in model 37 in generation 0: RollingRegression
Model Number: 38 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 38 in generation 0: GluonTS
Model Number: 39 with model MultivariateMotif in generation 0 of 1
Model Number: 40 with model MultivariateMotif in generation 0 of 1


  result = func(self.values, **kwargs)


Model Number: 41 with model DynamicFactor in generation 0 of 1




Model Number: 42 with model MultivariateMotif in generation 0 of 1


  ) = cd_fast.enet_coordinate_descent_multi_task(


Template Eval Error: Exception('Transformer DatepartRegression failed on fit') in model 42 in generation 0: MultivariateMotif
Model Number: 43 with model WindowRegression in generation 0 of 1


  model = cd_fast.enet_coordinate_descent(


Template Eval Error: Exception('Transformer AlignLastValue failed on inverse') in model 43 in generation 0: WindowRegression
Model Number: 44 with model RollingRegression in generation 0 of 1
Template Eval Error: ValueError('future_regressor not supplied, necessary for regression_type') in model 44 in generation 0: RollingRegression
Model Number: 45 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 45 in generation 0: GluonTS
Model Number: 46 with model DynamicFactor in generation 0 of 1
Template Eval Error: ValueError('Number of factors must be less than the number of endogenous variables.') in model 46 in generation 0: DynamicFactor
Model Number: 47 with model DynamicFactor in generation 0 of 1




Model Number: 48 with model GluonTS in generation 0 of 1
Template Eval Error: ImportError('GluonTS installation is incompatible with AutoTS. The numpy version is sometimes the issue, try 1.23.1 {as of 06-2023}') in model 48 in generation 0: GluonTS
Model Number: 49 with model WindowRegression in generation 0 of 1
Template Eval Error: ValueError("regression_type='User' but no future_regressor passed") in model 49 in generation 0: WindowRegression
Model Number: 50 with model WindowRegression in generation 0 of 1
Template Eval Error: Exception('Transformer AlignLastValue failed on inverse') in model 50 in generation 0: WindowRegression
Model Number: 51 with model WindowRegression in generation 0 of 1
Template Eval Error: ValueError("regression_type='User' but no future_regressor passed") in model 51 in generation 0: WindowRegression
Model Number: 52 with model NVAR in generation 0 of 1
Model Number: 53 with model RollingRegression in generation 0 of 1
Template Eval Error: ValueError('futu



Template Eval Error: AttributeError("'numpy.ndarray' object has no attribute 'values'") in model 56 in generation 0: DynamicFactor
Model Number: 57 with model VECM in generation 0 of 1
Model Number: 58 with model RollingRegression in generation 0 of 1
Template Eval Error: ValueError('future_regressor not supplied, necessary for regression_type') in model 58 in generation 0: RollingRegression
New Generation: 1 of 1
Model Number: 59 with model VECM in generation 1 of 1
Model Number: 60 with model MultivariateMotif in generation 1 of 1
Model Number: 61 with model DynamicFactor in generation 1 of 1




Model Number: 62 with model MultivariateMotif in generation 1 of 1
Model Number: 63 with model WindowRegression in generation 1 of 1
Model Number: 64 with model MultivariateMotif in generation 1 of 1
Model Number: 65 with model MultivariateMotif in generation 1 of 1
Model Number: 66 with model MultivariateMotif in generation 1 of 1
Model Number: 67 with model MultivariateMotif in generation 1 of 1


  x = um.multiply(x, x, out=x)


Model Number: 68 with model NVAR in generation 1 of 1
Model Number: 69 with model NVAR in generation 1 of 1
Template Eval Error: ValueError('Model NVAR returned NaN for one or more series. fail_on_forecast_nan=True') in model 69 in generation 1: NVAR
Model Number: 70 with model RollingRegression in generation 1 of 1


  result = func(self.values, **kwargs)


Model Number: 71 with model VECM in generation 1 of 1
Model Number: 72 with model NVAR in generation 1 of 1
Template Eval Error: ValueError('Model NVAR returned NaN for one or more series. fail_on_forecast_nan=True') in model 72 in generation 1: NVAR
Model Number: 73 with model NVAR in generation 1 of 1
Model Number: 74 with model RollingRegression in generation 1 of 1
Template Eval Error: ValueError('future_regressor not supplied, necessary for regression_type') in model 74 in generation 1: RollingRegression
Model Number: 75 with model MultivariateMotif in generation 1 of 1
Model Number: 76 with model VECM in generation 1 of 1


  x = um.multiply(x, x, out=x)


Model Number: 77 with model VECM in generation 1 of 1
Template Eval Error: ValueError("regression_type='User' but no future_regressor supplied") in model 77 in generation 1: VECM
Model Number: 78 with model MultivariateMotif in generation 1 of 1
Model Number: 79 with model DynamicFactor in generation 1 of 1




Model Number: 80 with model MultivariateMotif in generation 1 of 1
Model Number: 81 with model RollingRegression in generation 1 of 1




Model Number: 82 with model VECM in generation 1 of 1
Model Number: 83 with model WindowRegression in generation 1 of 1
Model Number: 84 with model WindowRegression in generation 1 of 1
Model Number: 85 with model VECM in generation 1 of 1
Template Eval Error: ValueError("regression_type='User' but no future_regressor supplied") in model 85 in generation 1: VECM
Model Number: 86 with model VAR in generation 1 of 1


  x = um.multiply(x, x, out=x)


Model Number: 87 with model DynamicFactor in generation 1 of 1




Model Number: 88 with model WindowRegression in generation 1 of 1




Model Number: 89 with model VAR in generation 1 of 1


  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


Template Eval Error: ValueError("Model returned NaN due to a preprocessing transformer {'fillna': 'rolling_mean_24', 'transformations': {'0': 'PctChangeTransformer', '1': 'KalmanSmoothing', '2': 'ClipOutliers', '3': 'DifferencedTransformer', '4': 'Detrend', '5': 'DifferencedTransformer'}, 'transformation_params': {'0': {}, '1': {'model_name': 'local linear stochastic seasonal 7', 'state_transition': [[1, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0, 1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]], 'process_noise': [[1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]], 'observation_model': [[1, 0, 1, 0, 0,

  x = um.multiply(x, x, out=x)


9 - VECM with avg smape 78.62: 
Model Number: 10 of 15 with model MultivariateMotif for Validation 1
10 - MultivariateMotif with avg smape 43.74: 
Model Number: 11 of 15 with model RollingRegression for Validation 1




11 - RollingRegression with avg smape 58.86: 
Model Number: 12 of 15 with model DynamicFactor for Validation 1




12 - DynamicFactor with avg smape 41.02: 
Model Number: 13 of 15 with model VECM for Validation 1
📈 13 - VECM with avg smape 33.54: 
Model Number: 14 of 15 with model VAR for Validation 1
14 - VAR with avg smape 53.66: 
Model Number: 15 of 15 with model DynamicFactor for Validation 1




15 - DynamicFactor with avg smape 49.77: 


Initiated AutoTS object with best model: 
MultivariateMotif
{'fillna': 'zero', 'transformations': {'0': 'Detrend', '1': 'bkfilter', '2': 'PowerTransformer', '3': 'AlignLastValue'}, 'transformation_params': {'0': {'model': 'GLS', 'phi': 1, 'window': None}, '1': {}, '2': {}, '3': {'rows': 1, 'lag': 1, 'method': 'additive', 'strength': 1.0, 'first_value_only': False}}}
{'window': 28, 'point_method': 'mean', 'distance_metric': 'correlation', 'k': 10, 'max_windows': 1000}
Validation: 0, 1
SMAPE: 26.1989165999426, 37.281059719956495
MAE: 9.375188225130243e+16, 8.154436625349336e+16
SPL: 8.78425499704309, 20.049534130103957

In [None]:
prediction = model.predict()

In [None]:
forecast = prediction.forecast

In [None]:
len(forecast)

In [None]:
prediction

In [None]:
y_act = df_processed['y']
y_act.reset_index(inplace = True, drop=True)
y_act

In [None]:
y_pred = predictions['yhat']
y_pred.reset_index(inplace = True, drop=True)
y_pred

In [None]:
from sklearn.metrics import mean_squared_error
import math
rmse = round(math.sqrt(mean_squared_error(predictions['yhat'], y_act)),2)


In [None]:
plt.plot(y_act, label = 'Actual')
plt.plot(predictions['yhat'], label = "Predicted")
plt.legend()

plt.title("AutoTS output RMSE - "+ str(rmse) )