In [1]:
!pip install yfinance pandas numpy statsmodels



1) create a linear forecast for stock price based on data for N days


In [2]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

def linear_forecast(ticker, N):
    data = yf.download(ticker)
    data['Day'] = np.arange(len(data))

    # Use the last N days for the model
    train_data = data.iloc[-N:]
    X = train_data['Day'].values.reshape(-1, 1)
    y = train_data['Close'].values

    model = LinearRegression().fit(X, y)
    pred_price = model.predict(np.array([[len(data)]]))
    return pred_price[0]

ticker = "AAPL"
N = 100
forecast_price = linear_forecast(ticker, N)
print(f"Linear forecast price for {ticker} based on last {N} days: ${forecast_price:.2f}")

[*********************100%%**********************]  1 of 1 completed
Linear forecast price for AAPL based on last 100 days: $175.56


2) for "1" assess standard error for M days ahead


In [3]:
from sklearn.metrics import mean_squared_error

def standard_error(ticker, N, M):
    data = yf.download(ticker)
    data['Day'] = np.arange(len(data))

    train_data = data.iloc[-(N+M):-M]
    test_data = data.iloc[-M:]

    X_train = train_data['Day'].values.reshape(-1, 1)
    y_train = train_data['Close'].values

    X_test = np.arange(len(train_data), len(train_data)+M).reshape(-1, 1)

    model = LinearRegression().fit(X_train, y_train)
    predictions = model.predict(X_test)

    return np.sqrt(mean_squared_error(test_data['Close'], predictions))

ticker = "AAPL"
N = 100
M = 10
error = standard_error(ticker, N, M)
print(f"Standard error for {ticker} for {M} days ahead based on {N} days: ${error:.2f}")

[*********************100%%**********************]  1 of 1 completed
Standard error for AAPL for 10 days ahead based on 100 days: $618.74


3) autocorrelation: regress consecutive daily returns by lag N for different stocks


In [6]:
import statsmodels.api as sm

def autocorrelation(ticker, N):
    data = yf.download(ticker)['Close'].pct_change().dropna()
    data_lag = data.shift(N).dropna()
    data = data[N:]
    X = sm.add_constant(data_lag)
    model = sm.OLS(data, X).fit()
    return model.summary()

ticker = "AAPL"
N = 1
print(autocorrelation(ticker, N))

[*********************100%%**********************]  1 of 1 completed
                            OLS Regression Results                            
Dep. Variable:                  Close   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     3.043
Date:                Sat, 21 Oct 2023   Prob (F-statistic):             0.0811
Time:                        15:05:32   Log-Likelihood:                 23271.
No. Observations:               10803   AIC:                        -4.654e+04
Df Residuals:                   10801   BIC:                        -4.652e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------

4) is stock price a Random Walk process?


In [7]:
from statsmodels.tsa.stattools import adfuller

def is_random_walk(ticker):
    data = yf.download(ticker)['Close']
    result = adfuller(data)
    return result[1] <= 0.05  # p-value

ticker = "AAPL"
print(f"Is {ticker} a random walk? {'Yes' if is_random_walk(ticker) else 'No'}")

[*********************100%%**********************]  1 of 1 completed
Is AAPL a random walk? No


5) moving average forecast – find an optimal N

In [8]:
def optimal_moving_average(ticker, max_N):
    data = yf.download(ticker)['Close']
    errors = []

    for N in range(1, max_N+1):
        forecast = data.rolling(window=N).mean().shift(1)
        mse = mean_squared_error(data[N:], forecast[N:])
        errors.append(mse)

    optimal_N = np.argmin(errors) + 1
    return optimal_N

ticker = "AAPL"
max_N = 100
best_N = optimal_moving_average(ticker, max_N)
print(f"Optimal N for moving average forecast of {ticker}: {best_N}")

[*********************100%%**********************]  1 of 1 completed
Optimal N for moving average forecast of AAPL: 1


6) exponential smoothing forecast – find optimal parameters


In [11]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

def optimal_exponential_smoothing(ticker):
    data = yf.download(ticker)['Close']
    model = ExponentialSmoothing(data).fit()
    return model.params

ticker = "AAPL"
params = optimal_exponential_smoothing(ticker)
print(f"Optimal parameters for exponential smoothing forecast of {ticker}: {params}")

[*********************100%%**********************]  1 of 1 completed
Optimal parameters for exponential smoothing forecast of AAPL: {'smoothing_level': 0.9627281360030423, 'smoothing_trend': nan, 'smoothing_seasonal': nan, 'damping_trend': nan, 'initial_level': 0.12792330809128308, 'initial_trend': nan, 'initial_seasons': array([], dtype=float64), 'use_boxcox': False, 'lamda': None, 'remove_bias': False}


  self._init_dates(dates, freq)
