<a href="https://colab.research.google.com/github/HelmieAnalytics/xardata/blob/main/ETF_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install yfinance



In [3]:
pip install mpld3

Collecting mpld3
  Downloading mpld3-0.5.10-py3-none-any.whl.metadata (5.1 kB)
Downloading mpld3-0.5.10-py3-none-any.whl (202 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m202.6/202.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mpld3
Successfully installed mpld3-0.5.10


In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import mpld3

mpld3.enable_notebook()

# List of ETFs (excluding ^J303.JO)
etfs = [
    'STXFIN.JO', 'STXIND.JO', 'STXRES.JO', 'SYG4IR.JO',
    'STXSWX.JO', 'CTOP50.JO', 'STXPRO.JO', 'STXWDM.JO',
    'SYGEMF.JO', 'STXNDQ.JO', 'STXCHN.JO', 'SYG500.JO',
    'GLPROP.JO', 'STXEMG.JO', 'SYGP.JO', 'NEWUSD.JO',
    'NEWGBP.JO', 'NEWPLT.JO', 'STXGOV.JO', 'FNBWGB.JO'
]

# Categorize ETFs
bond_etfs = ['STXGOV.JO', 'FNBWGB.JO']
local_etfs = ['STXFIN.JO', 'STXIND.JO', 'STXRES.JO', 'SYG4IR.JO',
              'STXSWX.JO', 'CTOP50.JO', 'STXPRO.JO']
world_market_etfs = ['STXWDM.JO', 'SYGEMF.JO', 'STXNDQ.JO',
                     'STXCHN.JO', 'SYG500.JO', 'GLPROP.JO',
                     'STXEMG.JO', 'SYGP.JO']

# Fetch historical price data for ETFs and ^J303.JO
data = yf.download(etfs + ['^J303.JO'], start='2020-01-01', end='2024-08-22')['Adj Close']

# Drop columns with all NaN values (failed downloads)
data = data.dropna(axis=1, how='all')

# Calculate returns for ETFs (excluding ^J303.JO)
returns = data[etfs].pct_change().dropna()

# Initial capital allocation
capital = 1000

def rebalance_portfolio_with_trend_filter_and_mean_check(returns, capital, trend_filter, prices):
    allocation = pd.Series(0, index=returns.columns)

    # Allocate 30% to bond ETFs
    bond_allocation = capital * 0.30 / len(bond_etfs)
    allocation[bond_etfs] = bond_allocation

    # Allocate 50% to local ETFs
    local_allocation = capital * 0.50 / len(local_etfs)
    allocation[local_etfs] = local_allocation

    # Allocate 20% to world market ETFs
    world_market_allocation = capital * 0.20 / len(world_market_etfs)
    allocation[world_market_etfs] = world_market_allocation

    portfolio_value = pd.DataFrame(index=trend_filter.index, columns=returns.columns)  # Use trend_filter.index

    # Calculate 100-day moving average for each ETF
    ma100 = prices.rolling(window=100).mean()

    # Iterate over MONTHLY dates to match trend_filter
    for date in trend_filter.index:
        if trend_filter.loc[date] > 0:
            for etf in returns.columns:
                # Use .loc with a tuple to access specific cells in the DataFrame
                if prices.loc[date, etf] < ma100.loc[date, etf]:
                    allocation[etf] = 0
            # Forward fill daily returns to match monthly dates
            daily_returns_filled = returns.loc[date:].iloc[0]
            portfolio_value.loc[date] = (daily_returns_filled + 1).cumprod() * allocation
        else:
            # Handle the first month where there's no previous data
            if date == trend_filter.index[0]:
                portfolio_value.loc[date] = allocation  # Start with initial allocation
            else:
                portfolio_value.loc[date] = portfolio_value.shift(1).loc[date]

    return portfolio_value.sum(axis=1)

# Calculate 200-day moving average for ^J303 index
jcals_ma200 = data['^J303.JO'].rolling(window=200).mean()

# Create trend filter: 1 if above 200-day MA, 0 if below
trend_filter_daily = (data['^J303.JO'] > jcals_ma200).astype(int)

# Resample trend filter to monthly frequency, taking the last value of each month
trend_filter_monthly = trend_filter_daily.resample('M').last()

# Rebalance portfolio monthly with trend filter and 100-day mean check
monthly_returns = returns.resample('M').apply(lambda x: (x + 1).prod() - 1)
# Resample prices to monthly frequency to align with monthly_returns and trend_filter_monthly
monthly_prices = data[etfs].resample('M').last()
portfolio_value = rebalance_portfolio_with_trend_filter_and_mean_check(monthly_returns, capital, trend_filter_monthly, monthly_prices)  # Pass monthly_prices here

# Normalize the benchmark performance (^J303.JO) and portfolio value to start from a common date
common_start_date = max(portfolio_value.index.min(), data['^J303.JO'].index.min())
portfolio_value = portfolio_value[portfolio_value.index >= common_start_date]
benchmark_returns = data['^J303.JO'][data['^J303.JO'].index >= common_start_date].pct_change().dropna().resample('M').apply(lambda x: (x + 1).prod() - 1)
benchmark_performance = (benchmark_returns + 1).cumprod() * portfolio_value.iloc[0]

# Create interactive plot
fig = go.Figure()

fig.add_trace(go.Scatter(x=portfolio_value.index, y=portfolio_value, mode='lines', name='Portfolio Value'))
fig.add_trace(go.Scatter(x=benchmark_performance.index, y=benchmark_performance, mode='lines', name='Benchmark (^J303.JO)'))

fig.update_layout(title='Portfolio Value vs Benchmark (^J303.JO)',
                  xaxis_title='Date',
                  yaxis_title='Value',
                  legend_title='Legend')

fig.show()

[*********************100%%**********************]  21 of 21 completed
  returns = data[etfs].pct_change().dropna()
  allocation[local_etfs] = local_allocation


In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import mpld3

mpld3.enable_notebook()

# List of ETFs (excluding ^J303.JO)
etfs = [
    'STXFIN.JO', 'STXIND.JO', 'STXRES.JO', 'SYG4IR.JO',
    'STXSWX.JO', 'CTOP50.JO', 'STXPRO.JO', 'STXWDM.JO',
    'SYGEMF.JO', 'STXNDQ.JO', 'STXCHN.JO', 'SYG500.JO',
    'GLPROP.JO', 'STXEMG.JO', 'SYGP.JO', 'NEWUSD.JO',
    'NEWGBP.JO', 'NEWPLT.JO', 'STXGOV.JO', 'FNBWGB.JO',
]

# Categorize ETFs
bond_etfs = ['STXGOV.JO', 'FNBWGB.JO']
local_etfs = ['STXFIN.JO', 'STXIND.JO', 'STXRES.JO', 'SYG4IR.JO',
              'STXSWX.JO', 'CTOP50.JO', 'STXPRO.JO']
world_market_etfs = ['STXWDM.JO', 'SYGEMF.JO', 'STXNDQ.JO',
                     'STXCHN.JO', 'SYG500.JO', 'GLPROP.JO',
                     'STXEMG.JO', 'SYGP.JO']

# Fetch historical price data for ETFs and ^J303.JO
data = yf.download(etfs + ['^J303.JO'], start='2020-01-01', end='2024-08-22')['Adj Close']

# Drop columns with all NaN values (failed downloads)
data = data.dropna(axis=1, how='all')

# Calculate returns for ETFs (excluding ^J303.JO)
returns = data[etfs].pct_change().dropna()

# Calculate correlation matrix
correlation_matrix = returns.corr()

# Calculate volatility
volatility = returns.std()

# Calculate momentum (e.g., 12-month momentum)
momentum = returns.rolling(window=252).apply(lambda x: (x + 1).prod() - 1).dropna()

# Initial capital allocation
capital = 1000

def rebalance_portfolio_with_trend_filter_and_mean_check(returns, capital, trend_filter, prices, correlation_matrix, volatility, momentum):
    allocation = pd.Series(0, index=returns.columns)

    # Allocate 30% to bond ETFs
    bond_allocation = capital * 0.30 / len(bond_etfs)
    allocation[bond_etfs] = bond_allocation

    # Allocate 50% to local ETFs
    local_allocation = capital * 0.50 / len(local_etfs)
    allocation[local_etfs] = local_allocation

    # Allocate 20% to world market ETFs
    world_market_allocation = capital * 0.20 / len(world_market_etfs)
    allocation[world_market_etfs] = world_market_allocation

    portfolio_value = pd.DataFrame(index=trend_filter.index, columns=returns.columns)  # Use trend_filter.index

    # Calculate 100-day moving average for each ETF
    ma100 = prices.rolling(window=100).mean()

    # Iterate over MONTHLY dates to match trend_filter
    for date in trend_filter.index:
        if trend_filter.loc[date] > 0:
            for etf in returns.columns:
                # Use .loc with a tuple to access specific cells in the DataFrame
                if prices.loc[date, etf] < ma100.loc[date, etf]:
                    allocation[etf] = 0
            # Forward fill daily returns to match monthly dates
            daily_returns_filled = returns.loc[date:].iloc[0]
            portfolio_value.loc[date] = (daily_returns_filled + 1).cumprod() * allocation
        else:
            # Handle the first month where there's no previous data
            if date == trend_filter.index[0]:
                portfolio_value.loc[date] = allocation  # Start with initial allocation
            else:
                portfolio_value.loc[date] = portfolio_value.shift(1).loc[date]

    return portfolio_value.sum(axis=1)

# Calculate 200-day moving average for ^J303 index
jcals_ma200 = data['^J303.JO'].rolling(window=200).mean()

# Create trend filter: 1 if above 200-day MA, 0 if below
trend_filter_daily = (data['^J303.JO'] > jcals_ma200).astype(int)

# Resample trend filter to monthly frequency, taking the last value of each month
trend_filter_monthly = trend_filter_daily.resample('M').last()

# Rebalance portfolio monthly with trend filter and 100-day mean check
monthly_returns = returns.resample('M').apply(lambda x: (x + 1).prod() - 1)
# Resample prices to monthly frequency to align with monthly_returns and trend_filter_monthly
monthly_prices = data[etfs].resample('M').last()
portfolio_value = rebalance_portfolio_with_trend_filter_and_mean_check(monthly_returns, capital, trend_filter_monthly, monthly_prices, correlation_matrix, volatility, momentum)  # Pass monthly_prices here

# Normalize the benchmark performance (^J303.JO) and portfolio value to start from a common date
common_start_date = max(portfolio_value.index.min(), data['^J303.JO'].index.min())
portfolio_value = portfolio_value[portfolio_value.index >= common_start_date]
benchmark_returns = data['^J303.JO'][data['^J303.JO'].index >= common_start_date].pct_change().dropna().resample('M').apply(lambda x: (x + 1).prod() - 1)
benchmark_performance = (benchmark_returns + 1).cumprod() * portfolio_value.iloc[0]

# Create interactive plot
fig = go.Figure()

fig.add_trace(go.Scatter(x=portfolio_value.index, y=portfolio_value, mode='lines', name='Portfolio Value'))
fig.add_trace(go.Scatter(x=benchmark_performance.index, y=benchmark_performance, mode='lines', name='Benchmark (^J303.JO)'))

fig.update_layout(title='Portfolio Value vs Benchmark (^J303.JO)',
                  xaxis_title='Date',
                  yaxis_title='Value',
                  legend_title='Legend')

fig.show()


[*********************100%%**********************]  21 of 21 completed

The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '71.42857142857143' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



In [15]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

# List of ETFs (excluding ^J303.JO)
etfs = [
    'STXFIN.JO', 'STXIND.JO', 'STXRES.JO', 'SYG4IR.JO',
    'STXSWX.JO', 'CTOP50.JO', 'STXPRO.JO', 'STXWDM.JO',
    'SYGEMF.JO', 'STXNDQ.JO', 'STXCHN.JO', 'SYG500.JO',
    'GLPROP.JO', 'STXEMG.JO', 'SYGP.JO', 'NEWUSD.JO',
    'NEWGBP.JO', 'NEWPLT.JO', 'STXGOV.JO', 'FNBWGB.JO',
]

# Fetch historical price data for ETFs and ^J303.JO
data = yf.download(etfs + ['^J303.JO'], start='2019-01-01', end='2024-08-22')['Adj Close']

# Drop columns with all NaN values (failed downloads)
data = data.dropna(axis=1, how='all')

# Calculate returns for ETFs (excluding ^J303.JO)
returns = data[etfs].pct_change().dropna()

# Calculate correlation matrix
correlation_matrix = returns.corr()

# Calculate volatility
volatility = returns.std()

# Calculate 2-month exponential regression slope
def exponential_regression_slope(prices, window=200):
    slopes = pd.DataFrame(index=prices.index, columns=prices.columns)
    for col in prices.columns:
        for i in range(window, len(prices)):
            y = np.log(prices[col].iloc[i-window:i])
            if y.isnull().any():
                continue  # Skip if there are NaN values in the window
            X = np.arange(window).reshape(-1, 1)
            model = LinearRegression().fit(X, y)
            slopes[col].iloc[i] = model.coef_[0]
    return slopes

slopes = exponential_regression_slope(data[etfs])

# Initial capital allocation
capital = 1000

def rebalance_portfolio(returns, capital, slopes, correlation_matrix, volatility):
    allocation = pd.Series(0, index=returns.columns)

    # Normalize slopes, correlation, and volatility
    norm_slopes = slopes.iloc[-1] / slopes.iloc[-1].abs().sum()
    norm_corr = 1 - correlation_matrix.mean()
    norm_vol = 1 / volatility

    # Combine factors to determine allocation
    combined_factors = norm_slopes * norm_corr * norm_vol
    combined_factors = combined_factors / combined_factors.sum()

    # Allocate capital based on combined factors
    allocation = combined_factors * capital

    portfolio_value = pd.DataFrame(index=returns.index, columns=returns.columns)

    # Iterate over MONTHLY dates to match returns
    for date in returns.index:
        daily_returns_filled = returns.loc[date]
        portfolio_value.loc[date] = (daily_returns_filled + 1).cumprod() * allocation

    return portfolio_value.sum(axis=1)

# Rebalance portfolio monthly
monthly_returns = returns.resample('M').apply(lambda x: (x + 1).prod() - 1)
portfolio_value = rebalance_portfolio(monthly_returns, capital, slopes, correlation_matrix, volatility)

# Normalize the benchmark performance (^J303.JO) and portfolio value to start from a common date
common_start_date = max(portfolio_value.index.min(), data['^J303.JO'].index.min())
portfolio_value = portfolio_value[portfolio_value.index >= common_start_date]
benchmark_returns = data['^J303.JO'][data['^J303.JO'].index >= common_start_date].pct_change().dropna().resample('M').apply(lambda x: (x + 1).prod() - 1)
benchmark_performance = (benchmark_returns + 1).cumprod() * portfolio_value.iloc[0]

# Create interactive plot
fig = go.Figure()

fig.add_trace(go.Scatter(x=portfolio_value.index, y=portfolio_value, mode='lines', name='Portfolio Value'))
fig.add_trace(go.Scatter(x=benchmark_performance.index, y=benchmark_performance, mode='lines', name='Benchmark (^J303.JO)'))

fig.update_layout(title='Portfolio Value vs Benchmark (^J303.JO)',
                  xaxis_title='Date',
                  yaxis_title='Value',
                  legend_title='Legend')

fig.show()


[*********************100%%**********************]  21 of 21 completed

The default fill_method='pad' in DataFrame.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.

