In [1]:
# Time Series Analysis & Forecasting — Complete Script
# Run in Jupyter or as a .py (requires internet for yfinance).

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error
import yfinance as yf
# optional: from pmdarima import auto_arima  # only if you want automated order selection

plt.rcParams['figure.figsize'] = (12, 6)

# ==========================
# Part A: Load & EDA
# ==========================

# 1) Download data for a ticker (change ticker if you want)
ticker = 'GOOGL'   # change to 'TSLA', 'AAPL', etc.
start_date = '2019-01-01'
end_date = None    # None means up to today

print(f"Downloading {ticker} data from {start_date} to {end_date or 'today'}...")
data = yf.download(ticker, start=start_date, end=end_date, progress=False)
# yfinance returns a DataFrame with Date as index already (DatetimeIndex)
# Ensure index is datetime:
data.index = pd.to_datetime(data.index)
df = data.copy()

# Quick look
print("\n--- Head ---")
print(df.head())
print("\n--- Data types ---")
print(df.dtypes)
print("\n--- Summary statistics ---")
print(df.describe().T)

# 2) Visualizations
# Line chart of Close over time
plt.figure()
plt.plot(df.index, df['Close'], label='Close')
plt.title(f'{ticker} Close Price Over Time')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.show()

# Histograms of key numeric variables
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
df[numeric_cols].hist(bins=40, figsize=(14,8))
plt.suptitle(f'{ticker} — Histograms of numeric features')
plt.show()

# ==========================
# Part B: Moving Average
# ==========================

# 3) Purpose of moving average:
# (Will print in text later; here we compute)
df['MA7'] = df['Close'].rolling(window=7, min_periods=1, center=False).mean()

# Plot Close and 7-day moving average
plt.figure()
plt.plot(df.index, df['Close'], label='Close', alpha=0.6)
plt.plot(df.index, df['MA7'], label='7-day MA', linewidth=2)
plt.title(f'{ticker} Close and 7-day Moving Average')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

# ==========================
# Part C: Time Series Decomposition
# ==========================

# We'll do decomposition on the Close series.
# seasonal_decompose requires a freq (period). For daily stock data, weekly seasonality -> period=5 (trading days)
# but if you want longer seasonality use 252 (~trading days in a year). We'll show both options.
close_series = df['Close'].asfreq('B')  # business day frequency (fills missing trading days with NaN)

# Option: drop NaN
close_series = close_series.fillna(method='ffill')  # forward-fill to avoid decomposition errors

# Decompose (use period=252 for yearly seasonality; you can also try period=5 for weekly)
decomp_period = 252
decomp = seasonal_decompose(close_series, model='additive', period=decomp_period)

# Plot decomposition
decomp.plot()
plt.suptitle(f'{ticker} — Additive Decomposition (period={decomp_period})', fontsize=16)
plt.show()

# ==========================
# Part D: Stationarity (ADF) & ARIMA
# ==========================

# 5) Stationarity explanation printed below.
# Use ADF test for the Volume series
volume_series = df['Volume'].dropna()

adf_result = adfuller(volume_series, autolag='AIC')
adf_output = {
    'ADF Statistic': adf_result[0],
    'p-value': adf_result[1],
    'Used Lag': adf_result[2],
    'Number of Observations': adf_result[3],
    'Critical Values': adf_result[4]
}
print("\n--- ADF Test on Volume ---")
for k,v in adf_output.items():
    print(f"{k}: {v}")

# Interpretation printed later.

# 6) ARIMA modeling & Forecasting
# NOTE: Task asked "Fit ARIMA(1,0,0) on Close and Forecast next 30 days of Volume".
# That's inconsistent. We'll:
#  a) Fit ARIMA(1,0,0) on Close and forecast 30 days of Close.
#  b) Fit ARIMA(1,0,0) on Volume and forecast 30 days of Volume (if you specifically need Volume forecast).

forecast_steps = 30

# We'll create a train/test split for evaluation: use last 60 trading days as test (approx)
test_days = 60
train_close = df['Close'][:-test_days]
test_close = df['Close'][-test_days:]

# Fit ARIMA(1,0,0) on Close (simple AR(1) basically)
model_close = ARIMA(train_close, order=(1,0,0))
res_close = model_close.fit()
print("\nARIMA(1,0,0) on Close — summary:")
print(res_close.summary())

# Forecast next 'forecast_steps' days of Close
# For plotting versus the original we'll forecast from end of train to end of forecast horizon
start_forecast = len(train_close)
end_forecast = len(train_close) + forecast_steps - 1
pred_close = res_close.predict(start=start_forecast, end=end_forecast, dynamic=False)
# Build index for forecasts
last_date = df.index[-1]
# Create business-day range for predicted dates (note: may not align exactly with trading days depending on holidays)
pred_index = pd.bdate_range(start=df.index[-1] + pd.Timedelta(days=1), periods=forecast_steps)

pred_close.index = pred_index

# Plot predictions vs actual Close (for the overlap region we have actual values only for test period)
plt.figure()
plt.plot(df.index, df['Close'], label='Actual Close')
plt.plot(pred_close.index, pred_close, label=f'ARIMA(1,0,0) forecast ({forecast_steps} days)', linestyle='--')
plt.title(f'{ticker} Close: Actual vs ARIMA(1,0,0) Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.show()

# Evaluate the forecast for the test window: produce a one-step forecast over the test period to compute MAE.
# Generate in-sample + out-of-sample point forecasts for the test period using the fitted model
# We'll forecast the test period length and compare with test_close for MAE
pred_for_test = res_close.predict(start=len(train_close), end=len(train_close)+len(test_close)-1)
pred_for_test.index = test_close.index  # align
mae_close = mean_absolute_error(test_close, pred_for_test)
print(f"\nMAE of ARIMA(1,0,0) on Close over last {test_days} days: {mae_close:.4f}")

# Now do the same for Volume (if you want a Volume forecast)
train_vol = df['Volume'][:-test_days]
test_vol = df['Volume'][-test_days:]

model_vol = ARIMA(train_vol, order=(1,0,0))
res_vol = model_vol.fit()
print("\nARIMA(1,0,0) on Volume — summary:")
print(res_vol.summary())

pred_vol_for_test = res_vol.predict(start=len(train_vol), end=len(train_vol)+len(test_vol)-1)
pred_vol_for_test.index = test_vol.index
mae_vol = mean_absolute_error(test_vol, pred_vol_for_test)
print(f"\nMAE of ARIMA(1,0,0) on Volume over last {test_days} days: {mae_vol:.4f}")

# Forecast next forecast_steps for Volume
pred_vol = res_vol.predict(start=len(train_vol), end=len(train_vol) + forecast_steps - 1)
pred_vol.index = pred_index  # same future dates
plt.figure()
plt.plot(df.index, df['Volume'], label='Actual Volume')
plt.plot(pred_vol.index, pred_vol, label=f'ARIMA(1,0,0) Volume Forecast ({forecast_steps} days)', linestyle='--')
plt.title(f'{ticker} Volume: Actual vs ARIMA(1,0,0) Forecast')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.legend()
plt.grid(True)
plt.show()

# Save forecasts to CSV if desired
out = pd.DataFrame({
    'pred_close': pred_close,
    'pred_vol': pred_vol
})
out.to_csv(f'{ticker}_arima_{forecast_steps}day_forecast.csv')
print(f"\nSaved forecast CSV to {ticker}_arima_{forecast_steps}day_forecast.csv")

ModuleNotFoundError: No module named 'yfinance'