# Q4

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tabulate import tabulate
import warnings

warnings.filterwarnings('ignore')
plt.style.use('ggplot')

# seed np
np.random.seed(5)

In [6]:
##### Step 1: Data Acquisition (Don't change this code)
start_date = "2023-01-01"
end_date = "2025-01-01" # End date is exclusive in some cases, so set it to the start of the next year

data = yf.download("AAPL", start=start_date, end=end_date)
data.head()


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2023-01-03,123.211212,128.954561,122.324586,128.34378,112117500
2023-01-04,124.482056,126.747876,123.22108,125.004178,89113600
2023-01-05,123.161949,125.871079,122.905819,125.240591,80962700
2023-01-06,127.693581,128.353621,123.033882,124.137239,87754700
2023-01-09,128.215698,131.427258,127.959568,128.53095,70790800


In [None]:
##### Step 2: Preprocess Data and Assign States

# Calculate daily returns
data['Return'] = __ #TODO
data.dropna(inplace=True) # to remove Nan values

# Define 5 states based on quantiles e.g [0.2, 0.4, 0.6, 0.8]
quantiles = __ #TODO
state_names = ["Very Bearish", "Bearish", "Neutral", "Bullish", "Very Bullish"]

data['State'] = __ #TODO a column that provides the 5 differen states "Very Bearish", "Bearish", "Neutral", "Bullish", "Very Bullish" coded as numbers from 0 to 4

# Store return distributions for each state (needed at step 5)
return_distributions = []
for i in range(5):
    return_distributions.append(data[data['State'] == i]['Return'].values)


print("\nData with States:")
data.tail()


In [None]:
##### Step 3: Construct Transition Matrix

# __code below__  #TODO might be multiple steps.



print("\nTransition Matrix (P):")
print(P)


In [None]:
##### Step 4: Analyze Markov Chain Properties

# Visualize as heatmap
plt.figure(figsize=(10, 7))
sns.heatmap(P, annot=True, fmt='.2f', cmap='viridis',
            xticklabels=state_names, yticklabels=state_names)
plt.title(f'Transition Matrix Heatmap for AAPL')
plt.xlabel('Next State')
plt.ylabel('Current State')
plt.show()

# Calculate stationary distribution
# We solve pi = pi * P, which is (P.T - I) * pi.T = 0
# This is an eigenvector problem. We find the eigenvector of P.T for eigenvalue 1.
# __code below__  #TODO might be multiple steps.

pi = pi.flatten().real # Get 1D array if you haven't done already.

print("\nStationary Distribution (pi):")
pi_series = pd.Series(pi, index=state_names)
print(pi_series)
print("\nInterpretation: ---------------------") # TODO

# Calculate mean return time
mean_return_time = __ #TODO
print("\nMean Return Time (in days):")
mrt_series = pd.Series(mean_return_time, index=state_names)
print(mrt_series)


In [None]:
##### Step 5: Simulate Future Stock Prices

def simulate_prices(start_price, n_days, P, return_dists):
    prices = [start_price]
    # Start from the last known state
    current_state = data['State'].iloc[-1]


    #__ CODE below___
      # 1. Predict next state

      # 2. Sample a return from that state's distribution

      # 3. Calculate next price


      # 4. Update current state


    return prices

# Run 1,000 simulations
n_simulations = 1000
n_forecast_days = 100 #
last_price = __ #TODO
simulations = []


for _ in range(n_simulations):
    simulations.append(simulate_prices(last_price, n_forecast_days, P, return_distributions))

simulations_df = pd.DataFrame(simulations).T
simulations_df.columns = [f'Sim {i+1}' for i in range(n_simulations)]

simulations_df

In [None]:

##### Step 6: Backtest Model Predictions
# Note: A true rolling window backtest is computationally intensive.
# This is a simplified version for demonstration.
# A full backtest would re-calculate P and return_dists at each step.

def backtest_model(historical_data, window_size, n_sims=100):
    predictions = []
    actuals = []
    lower_ci = []
    upper_ci = []

    # Start after the first window
    for t in range(window_size, len(historical_data)):
      #___ code below___
        # 1. Define rolling window

        # 2. Re-calculate states, P, and return_dists
        # In a real test, you would re-run steps 2 & 3 on 'window_data'
        # For simplicity, we use the global P and return_dists
        # but sample from the state at t-1.


        # 3. Simulate 1-day ahead 'n_sims' times

        # 4. Calculate stats for this 1-day forecast

    backtest_df = pd.DataFrame({
        'Actual': actuals,
        'Predicted': predictions,
        'Lower_CI': lower_ci,
        'Upper_CI': upper_ci
    }, index=historical_data.index[window_size:])

    return backtest_df

# Run backtest (e.g., on last 100 days with a 150-day window)
backtest_window = 150
backtest_data = data.iloc[-250:] # Use last ~year of data
backtest_results = backtest_model(backtest_data, backtest_window)

backtest_results

In [None]:
##### Step 7: Visualize and Interpret Results
# Plot 1: Backtesting Results =
plt.figure(figsize=(15, 7))

##__ code below__

In [None]:

# Plot 2: Future Forecast

##__ code below__


In [None]:
##### Step 8: Generate Summary Statistics
final_day_prices = simulations_df.iloc[-1]

## __code below__
#statistics to inclide
#Current Price
#Predicted Price (100 Days, Mean)
#Average Predicted Price (Period)
#Prediction Range (Min
#Prediction Range (Max)
#Historical Volatility (2y, Ann.)
#Simulated Volatility (100d, Ann.)
#90% CI (Final Price)
#95% CI (Final Price)




In [None]:
##### Step 9: Evaluate Model Performance
#__ code below___


print(f"""
Discussion:
-----------------------------------------
""") ## TODO