In [1]:
!pip install pandas numpy matplotlib yfinance backtrader
from IPython.core.display import clear_output
clear_output()

In [2]:
import yfinance as yf
import backtrader as bt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.seasonal import seasonal_decompose

# Introduction
This notebook is designed to showcase a comprehensive analysis of a diverse portfolio, integrating Index Funds, Leveraged ETFs, monthly dividend REITs and ETFs, and quarterly dividend stocks. The analysis includes portfolio beta evaluation, backtesting a Dollar-Cost Averaging (DCA) strategy, and options pricing.


In [8]:
# Define Portfolio Assets
index_funds = ['SPY', 'QQQ', 'DAX']
leveraged_etfs = ['TQQQ', 'UMDD', 'UDOW', 'SOXL', 'NVDL', 'TSLL', 'BITX']
monthly_dividend_reits_etfs = ['O', 'AGNC', 'CSHI', 'JEPI', 'NUSI']
quarterly_dividend_stocks = [
    'SPYD', 'MSFT', 'INTC', 'F', 'CSCO', 'BAC', 'PFE', 'BX', 'MO', 
    'DOW', 'WMT', 'T', 'KMB', 'SWK', 'IBM', 'PEP', 'KO', 'JNJ'
]

In [12]:
############################################################################################################
# Define Technical Indicators Functions
############################################################################################################
def bollinger_bands(data, window=20, num_std=2):
    rolling_mean = data['Close'].rolling(window=window).mean()
    rolling_std = data['Close'].rolling(window=window).std()
    data['Bollinger_High'] = rolling_mean + (rolling_std * num_std)
    data['Bollinger_Low'] = rolling_mean - (rolling_std * num_std)
    return data

def macd(data, short_window=12, long_window=26, signal_window=9):
    short_ema = data['Close'].ewm(span=short_window, adjust=False).mean()
    long_ema = data['Close'].ewm(span=long_window, adjust=False).mean()
    data['MACD'] = short_ema - long_ema
    data['Signal'] = data['MACD'].ewm(span=signal_window, adjust=False).mean()
    return data

def rsi(data, periods=14, ema=True):
    close_delta = data['Close'].diff()
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    if ema:
        ma_up = up.ewm(com=periods - 1, adjust=True, min_periods=periods).mean()
        ma_down = down.ewm(com=periods - 1, adjust=True, min_periods=periods).mean()
    else:
        ma_up = up.rolling(window=periods).mean()
        ma_down = down.rolling(window=periods).mean()
    rsi = ma_up / ma_down
    data['RSI'] = 100 - (100 / (1 + rsi))
    return data

def woodie_pivots(data):
    high = data['High']
    low = data['Low']
    close = data['Close']
    pivot = (high + low + 2 * close) / 4
    data['Pivot'] = pivot
    data['R1'] = 2 * pivot - low
    data['S1'] = 2 * pivot - high
    data['R2'] = pivot + (high - low)
    data['S2'] = pivot - (high - low)
    data['R3'] = high + 2 * (pivot - low)
    data['S3'] = low - 2 * (high - pivot)
    data['R4'] = pivot + 3 * (high - low)
    data['S4'] = pivot - 3 * (high - low)
    return data

def obv(data):
    data['OBV'] = np.where(data['Close'] > data['Close'].shift(1), data['Volume'],
                           np.where(data['Close'] < data['Close'].shift(1), -data['Volume'], 0)).cumsum()
    return data

def atr(data, window=14):
    high_low = data['High'] - data['Low']
    high_close = np.abs(data['High'] - data['Close'].shift())
    low_close = np.abs(data['Low'] - data['Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)
    data['ATR'] = true_range.rolling(window=window).mean()
    return data

def stochastic_oscillator(data, window=14):
    low_min = data['Low'].rolling(window=window).min()
    high_max = data['High'].rolling(window=window).max()
    data['%K'] = 100 * ((data['Close'] - low_min) / (high_max - low_min))
    data['%D'] = data['%K'].rolling(window=3).mean()
    return data

############################################################################################################
# Process other non-stationary data
############################################################################################################
# Function to detrend time series data using a linear regression model
def detrend_data(data, column):
    # Linear regression model requires reshaped index as a feature
    X = np.arange(len(data)).reshape(-1, 1)
    y = data[column].values  # Original values to detrend
    
    # Create and fit the model
    model = LinearRegression()
    model.fit(X, y)
    
    # Predict the trend
    trend = model.predict(X)
    
    # Detrend by subtracting the trend from the original data
    detrended = y - trend
    data[f'{column}_detrended'] = detrended
    
    # Return the detrended data and the trend for further analysis
    return data, trend

def seasonal_decomposition(data, column, period):
    # Perform seasonal decomposition
    decomposition = seasonal_decompose(data[column], model='multiplicative', period=period)
    
    # Add components to DataFrame
    data['trend_component'] = decomposition.trend
    data['seasonal_component'] = decomposition.seasonal
    data['residual_component'] = decomposition.resid
    
    # Seasonally adjust the data
    data[column + '_seasonally_adjusted'] = data[column] / data['seasonal_component']
    
    return data

# Function to calculate price differences
def calculate_price_differences(data, column):
    data[f'{column}_diff'] = data[column].diff()
    return data

# Function to calculate log returns
def calculate_log_returns(data, column):
    data[f'{column}_log_return'] = np.log(data[column] / data[column].shift(1))
    return data

# Function to calculate volume changes
def calculate_volume_changes(data, volume_column):
    data[f'{volume_column}_changes'] = data[volume_column].diff()
    return data


In [13]:
############################################################################################################
# Fetch Financial Data Function with Technical Indicators
############################################################################################################
def fetch_financial_data(ticker='SPY', start_year=1993, end_year=None, interval='1d', export_csv=False, csv_file=None,  calculate_indicators=False,):
    """
    Fetches data for a specified ticker from Yahoo Finance from the given start year to the current year or specified end year at specified intervals.
    
    Parameters:
        ticker (str): The ticker symbol for the asset. Defaults to 'SPY'.
        start_year (int): The year from which to start fetching the data. Defaults to 1993.
        end_year (int): The last year for which to fetch the data. Defaults to the current year if None.
        interval (str): The data interval ('1d' for daily, '1wk' for weekly, '1mo' for monthly, '1h' for hourly).
        export_csv (bool): Whether to export the data to a CSV file. Defaults to False.
        csv_file (str): The path of the CSV file to export the data to. Automatically determined if None.
    """
    # If end_year is not specified, use the current year
    # Hourly data can only be fetched for the last 730 days
    if end_year is None:
        end_year = pd.Timestamp.today().year
        
    # If csv_file is not specified, automatically generate a file name based on the ticker and interval
    if csv_file is None:
        csv_file = f'{ticker}_{interval}_data_{start_year}_to_{end_year}.csv'
    
    # Adjust end_date to ensure data is fetched through the end of the end_year
    end_date = f"{end_year}-12-31"
    
    # Download the data before attempting to access it
    data = yf.download(ticker, start=f'{start_year}-01-01', end=end_date, interval=interval)
    
    # Ensure 'data' has been successfully downloaded and is not empty
    if not data.empty and calculate_indicators:
        data = bollinger_bands(data)
        data = macd(data)
        data = rsi(data)
        data = woodie_pivots(data)
        data = obv(data)
        data = atr(data)
        data = stochastic_oscillator(data)

        # Non-stationary data processing
        data = calculate_price_differences(data, 'Close')  # Calculate price differences
        data = calculate_log_returns(data, 'Close')  # Calculate log returns for the 'Close' column
        data = calculate_volume_changes(data, 'Volume')  # Calculate volume changes


    else:
        print("Data download failed or returned an empty DataFrame.")
    
    if export_csv and not data.empty:
        data.to_csv(csv_file)
        print(f'Data exported to {csv_file}')
    
    return data 

In [None]:
# Initialize an empty dictionary to store data for each fund
funds_data = {}

# Iterate over the index funds to fetch data for each and store it in the dictionary
for fund in index_funds:
    print(f"Fetching data for {fund}...")
    # Note: Adjust the parameters according to your fetch_financial_data function's definition
    # Here it's assumed that fetch_financial_data only requires the ticker symbol as a parameter
    funds_data[fund] = fetch_financial_data(ticker=fund, calculate_indicators=True)

# Now, extract the data for each fund into its own variable
SPY_data = funds_data['SPY']
QQQ_data = funds_data['QQQ']
DAX_data = funds_data.get('DAX')  # Using .get() for 'DAX' in case it's not available/fetched correctly


In [None]:
# Initialize an empty dictionary to store data for each ETF
etfs_data = {}

# Iterate over the leveraged ETFs to fetch data for each and store it in the dictionary
for etf in leveraged_etfs:
    print(f"Fetching data for {etf}...")
    # Adjust parameters as per your fetch_financial_data function's requirements
    etfs_data[etf] = fetch_financial_data(ticker=etf, calculate_indicators=True)
# Now, let's assume you want to access the data specifically for TQQQ, UDOW, and SOXL
TQQQ_data = etfs_data['TQQQ']
UMDD_data = etfs_data['UMDD']
UDOW_data = etfs_data['UDOW']
SOXL_data = etfs_data['SOXL']
NVDL_data = etfs_data['NVDL']
TSLL_data = etfs_data['TSLL']
BITX_data = etfs_data['BITX']


In [None]:
# monthly_dividend_reits_etfs
monthly_dividend_data = {}

# Iterate over the monthly dividend REITs and ETFs to fetch data for each and store it in the dictionary
for asset in monthly_dividend_reits_etfs:
    print(f"Fetching data for {asset}...")
    # Adjust parameters as per your fetch_financial_data function's requirements
    monthly_dividend_data[asset] = fetch_financial_data(ticker=asset, calculate_indicators=True)

# Access the data specifically for each asset
O_data = monthly_dividend_data['O']
AGNC_data = monthly_dividend_data['AGNC']
CSHI_data = monthly_dividend_data['CSHI']
JEPI_data = monthly_dividend_data['JEPI']
NUSI_data = monthly_dividend_data['NUSI']

In [None]:
# Initialize an empty dictionary to store data for each stock
quarterly_dividend_data = {}

# Iterate over the quarterly dividend stocks to fetch data for each and store it in the dictionary
for stock in quarterly_dividend_stocks:
    print(f"Fetching data for {stock}...")
    quarterly_dividend_data[stock] = fetch_financial_data(ticker=stock, calculate_indicators=True)

# Now, you can access the data specifically for each stock, for example:
MSFT_data = quarterly_dividend_data['MSFT']
INTC_data = quarterly_dividend_data['INTC']
F_data = quarterly_dividend_data['F']
CSCO_data = quarterly_dividend_data['CSCO']
MSFT_data = quarterly_dividend_data['MSFT']
INTC_data = quarterly_dividend_data['INTC']
IBM_data = quarterly_dividend_data['IBM']
BAC_data = quarterly_dividend_data['BAC']
PFE_data = quarterly_dividend_data['PFE']
BX_data = quarterly_dividend_data['BX']
MO_data = quarterly_dividend_data['MO']
DOW_data = quarterly_dividend_data['DOW']
WMT_data = quarterly_dividend_data['WMT']
T_data = quarterly_dividend_data['T']
KMB_data = quarterly_dividend_data['KMB']
SWK_data = quarterly_dividend_data['SWK']
PEP_data = quarterly_dividend_data['PEP']
KO_data = quarterly_dividend_data['KO']
JNJ_data = quarterly_dividend_data['JNJ']

# Beta Weight

In [26]:
def calculate_beta(stock_symbol, market_symbol='^GSPC', start='2020-01-01', end='2023-01-01'):
    try:
        stock_data = yf.download(stock_symbol, start=start, end=end)['Adj Close']
        market_data = yf.download(market_symbol, start=start, end=end)['Adj Close']
    except Exception as e:
        return {'error': f'Failed to download data: {str(e)}'}
    
    returns = pd.DataFrame({
        'stock_returns': stock_data.pct_change(),
        'market_returns': market_data.pct_change()
    }).dropna()
    
    betas = {}
    
    # Method 1: Linear Regression
    try:
        model = LinearRegression().fit(returns[['market_returns']], returns['stock_returns'])
        betas['linear_regression'] = model.coef_[0]
    except Exception as e:
        betas['linear_regression_error'] = str(e)
    
    # Method 2: Covariance Method
    try:
        covariance = returns.cov().iloc[0, 1]
        market_var = returns['market_returns'].var()
        betas['covariance_method'] = covariance / market_var
    except Exception as e:
        betas['covariance_method_error'] = str(e)
    
    # Method 3: Variance Ratio
    try:
        stock_var = returns['stock_returns'].var()
        betas['variance_ratio'] = stock_var / market_var
    except Exception as e:
        betas['variance_ratio_error'] = str(e)
    
    # Method 4: Using scipy linregress for an alternative linear regression method
    try:
        slope, _, _, _, _ = linregress(returns['market_returns'], returns['stock_returns'])
        betas['linregress'] = slope
    except Exception as e:
        betas['linregress_error'] = str(e)
    
    return betas

In [27]:
# Combine all asset lists into a single dictionary for easier iteration
assets = {
    'Index Funds': index_funds,
    'Monthly Dividend REITs/ETFs': monthly_dividend_reits_etfs,
    'Quarterly Dividend Stocks': quarterly_dividend_stocks
}

# Initialize a dictionary to store beta values for each asset category
beta_values = {category: {} for category in assets.keys()}

# Iterate through each category and asset to calculate beta values
for category, asset_list in assets.items():
    print(f"\nCalculating beta for {category}:")
    for asset in asset_list:
        try:
            # Assuming 'calculate_beta' function uses the default market symbol and dates
            beta = calculate_beta(asset)
            # Store the beta values (or errors) for each asset under its category
            beta_values[category][asset] = beta
            print(f"  {asset}: {beta}")
        except Exception as e:
            print(f"  Error calculating beta for {asset}: {str(e)}")


Calculating beta for Index Funds:


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  SPY: {'linear_regression': 0.9807573192281638, 'covariance_method': 0.9807573192281639, 'variance_ratio': 0.9640129508634715, 'linregress_error': "name 'linregress' is not defined"}
  QQQ: {'linear_regression': 1.0864855480105435, 'covariance_method': 1.086485548010543, 'variance_ratio': 1.3591367271807535, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  DAX: {'linear_regression': 0.8825334837886392, 'covariance_method': 0.8825334837886386, 'variance_ratio': 1.2270075325550236, 'linregress_error': "name 'linregress' is not defined"}

Calculating beta for Monthly Dividend REITs/ETFs:
  O: {'linear_regression': 0.9530986255253945, 'covariance_method': 0.9530986255253937, 'variance_ratio': 2.16326303695141, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  AGNC: {'linear_regression': 0.7885076022463374, 'covariance_method': 0.7885076022463371, 'variance_ratio': 2.012153327556101, 'linregress_error': "name 'linregress' is not defined"}
  CSHI: {'linear_regression': 0.021816465606245592, 'covariance_method': 0.021816465606245585, 'variance_ratio': 0.0013103351613954139, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  JEPI: {'linear_regression': 0.5346861682740263, 'covariance_method': 0.5346861682740262, 'variance_ratio': 0.37354580188874537, 'linregress_error': "name 'linregress' is not defined"}
  NUSI: {'linear_regression': 0.3427239488232195, 'covariance_method': 0.3427239488232191, 'variance_ratio': 0.28968519281356536, 'linregress_error': "name 'linregress' is not defined"}

Calculating beta for Quarterly Dividend Stocks:


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  SPYD: {'linear_regression': 0.980336404647159, 'covariance_method': 0.9803364046471584, 'variance_ratio': 1.3805153271809842, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  MSFT: {'linear_regression': 1.1727978449660525, 'covariance_method': 1.1727978449660523, 'variance_ratio': 1.863093995597248, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

  INTC: {'linear_regression': 1.1852680404816687, 'covariance_method': 1.1852680404816687, 'variance_ratio': 2.730387030608507, 'linregress_error': "name 'linregress' is not defined"}



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  F: {'linear_regression': 1.214281608689817, 'covariance_method': 1.214281608689818, 'variance_ratio': 3.788059275092534, 'linregress_error': "name 'linregress' is not defined"}
  CSCO: {'linear_regression': 0.9307123848855793, 'covariance_method': 0.9307123848855797, 'variance_ratio': 1.5342718263335713, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

  BAC: {'linear_regression': 1.224233764099381, 'covariance_method': 1.2242337640993808, 'variance_ratio': 2.648172147129387, 'linregress_error': "name 'linregress' is not defined"}



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  PFE: {'linear_regression': 0.5745466770847829, 'covariance_method': 0.5745466770847825, 'variance_ratio': 1.3827259767023081, 'linregress_error': "name 'linregress' is not defined"}
  BX: {'linear_regression': 1.433514055417847, 'covariance_method': 1.4335140554178458, 'variance_ratio': 3.338992364399656, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  MO: {'linear_regression': 0.5991406514706654, 'covariance_method': 0.5991406514706651, 'variance_ratio': 1.225869965640212, 'linregress_error': "name 'linregress' is not defined"}
  DOW: {'linear_regression': 1.1399644717966424, 'covariance_method': 1.1399644717966428, 'variance_ratio': 2.8705519263712658, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

  WMT: {'linear_regression': 0.48746636659585707, 'covariance_method': 0.487466366595857, 'variance_ratio': 1.0204929193900127, 'linregress_error': "name 'linregress' is not defined"}



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  T: {'linear_regression': 0.6871323738080067, 'covariance_method': 0.6871323738080073, 'variance_ratio': 1.2313085782505946, 'linregress_error': "name 'linregress' is not defined"}
  KMB: {'linear_regression': 0.4614531906259107, 'covariance_method': 0.4614531906259102, 'variance_ratio': 0.8933436384082574, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  SWK: {'linear_regression': 1.322569276436186, 'covariance_method': 1.3225692764361872, 'variance_ratio': 3.2446310480712177, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


  IBM: {'linear_regression': 0.8093401230560937, 'covariance_method': 0.8093401230560937, 'variance_ratio': 1.4199949642884129, 'linregress_error': "name 'linregress' is not defined"}
  PEP: {'linear_regression': 0.7336125249600887, 'covariance_method': 0.7336125249600887, 'variance_ratio': 0.9933018970871755, 'linregress_error': "name 'linregress' is not defined"}


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

  KO: {'linear_regression': 0.6863211621910452, 'covariance_method': 0.6863211621910449, 'variance_ratio': 0.9347488828571606, 'linregress_error': "name 'linregress' is not defined"}



[*********************100%%**********************]  1 of 1 completed

  JNJ: {'linear_regression': 0.5395775104998733, 'covariance_method': 0.539577510499874, 'variance_ratio': 0.7351049575409733, 'linregress_error': "name 'linregress' is not defined"}





# Valuation Models for Each Asset Class:

# Backtesting for DCA Strategy

# Options Pricing Analysis