In [11]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Load datasets using yfinance
jj_data = yf.download('JNJ', start='2010-01-01', end='2023-12-31', interval='3mo')
jj_data.rename(columns={'Close': 'Sales'}, inplace=True)
jj_data = jj_data[['Sales']]

amzn_data = yf.download('AMZN', start='2010-01-01', end='2023-12-31')

# Function to test stationarity
def test_stationarity(series, name):
    result = adfuller(series.dropna())
    print(f'ADF Statistic for {name}: {result[0]}')
    print(f'p-value: {result[1]}')
    if result[1] > 0.05:
        print(f'{name} is non-stationary.\n')
    else:
        print(f'{name} is stationary.\n')

# Johnson & Johnson Analysis
test_stationarity(jj_data['Sales'], 'Johnson & Johnson Sales')
jj_data['Sales_diff'] = jj_data['Sales'].diff()
jj_data.dropna(inplace=True)  # Remove NaNs after differencing
test_stationarity(jj_data['Sales_diff'], 'Differenced Sales')

# Amazon Univariate Analysis
amzn_data['Close_log'] = np.log(amzn_data['Close'])
amzn_data['Close_log_diff'] = amzn_data['Close_log'].diff()
amzn_data.dropna(inplace=True)  # Drop NaNs before testing
test_stationarity(amzn_data['Close_log_diff'], 'Amazon Close Price (Log Differenced)')

# Amazon Multivariate Analysis
amzn_multi = amzn_data[['Close','Volume']].copy()
amzn_multi['Close_log'] = np.log(amzn_multi['Close'])
amzn_multi['Volume_log'] = np.log(amzn_multi['Volume'])
amzn_multi['Close_log_diff'] = amzn_multi['Close_log'].diff()
amzn_multi['Volume_log_diff'] = amzn_multi['Volume_log'].diff()
amzn_multi.dropna(inplace=True)

# ARIMA Model for Johnson & Johnson
jj_arima = ARIMA(jj_data['Sales'], order=(2,1,2)).fit()
jj_forecast = jj_arima.get_forecast(steps=8).predicted_mean  # 2 years of quarterly data

# VAR Model for Amazon
train = amzn_multi[['Close_log_diff','Volume_log_diff']][:-24]
var_model = VAR(train).fit(maxlags=12)
var_forecast = var_model.forecast(train.values, steps=24)

# LSTM Model for Johnson & Johnson
scaler = MinMaxScaler()
jj_scaled = scaler.fit_transform(jj_data['Sales_diff'].values.reshape(-1,1))
X, y = [], []
n_steps = 4
for i in range(len(jj_scaled)-n_steps):
    X.append(jj_scaled[i:i+n_steps, 0])
    y.append(jj_scaled[i+n_steps, 0])
X, y = np.array(X), np.array(y)

model = Sequential([
    LSTM(50, activation='relu', input_shape=(n_steps,1)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=100, verbose=0)

# Evaluation
def evaluate(actual, forecast, name):
    mae = mean_absolute_error(actual, forecast)
    rmse = np.sqrt(mean_squared_error(actual, forecast))
    print(f'{name} - MAE: {mae:.2f}, RMSE: {rmse:.2f}')

# Plotting functions would go here


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


ADF Statistic for Johnson & Johnson Sales: 0.5829866775916936
p-value: 0.9871886859014881
Johnson & Johnson Sales is non-stationary.

ADF Statistic for Differenced Sales: -5.287484567820151
p-value: 5.812158054031337e-06
Differenced Sales is stationary.

ADF Statistic for Amazon Close Price (Log Differenced): -60.59225630188504
p-value: 0.0
Amazon Close Price (Log Differenced) is stationary.

