In [1]:
# Section 1: CONFIGURATION

import pandas as pd
import numpy as np
import yfinance as yf

# 1. Universe and sector maps (fill these in when ready)
US_TICKERS = ['AAPL', 'MSFT', 'AMZN', 'JNJ', 'XOM']
IN_TICKERS = ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'ICICIBANK.NS']

SECTOR_MAP_US = {
    'AAPL': 'Information Technology',
    'MSFT': 'Information Technology',
    'AMZN': 'Consumer Discretionary',
    'JNJ':  'Health Care',
    'XOM':  'Energy'
}

SECTOR_MAP_IN = {
    'RELIANCE.NS':   'Energy',
    'TCS.NS':        'Information Technology',
    'HDFCBANK.NS':   'Financials',
    'INFY.NS':       'Information Technology',
    'ICICIBANK.NS':  'Financials'
}

# 2. Capital allocation
US_CAPITAL = 10_000       # USD
IN_CAPITAL = 1_000_000    # INR

# 3. Strategy parameters
LOOKBACK_MONTHS = 12      # months for momentum
SKIP_MONTHS     = 1       # skip most recent
STOP_LOSS       = 0.12    # 12% per-position stop-loss
REBALANCE_DAY   = 'MON'   # weekly on Mondays


# Section 2: DATA INGESTION 

def download_price_data(tickers, start_date, end_date):
    """
    Download price series for given tickers using yfinance.
    Tries 'Adj Close' first, falls back to 'Close' if needed.
    Returns a DataFrame with dates as index and tickers as columns.
    """
    df = yf.download(tickers, start=start_date, end=end_date)
    
    # If multi-level columns (multiple tickers), pick the right field
    if isinstance(df.columns, pd.MultiIndex):
        if 'Adj Close' in df.columns.levels[0]:
            prices = df['Adj Close']
        else:
            prices = df['Close']
    else:
        # Single-level columns (e.g. one ticker)
        if 'Adj Close' in df.columns:
            prices = df['Adj Close']
        elif 'Close' in df.columns:
            prices = df['Close']
        else:
            raise KeyError("Neither 'Adj Close' nor 'Close' found in data")
    
    # Forward-fill missing days, then drop rows with all NaNs
    prices = prices.ffill().dropna(how='all')
    return prices

# Example usage (after populating US_TICKERS / IN_TICKERS):
start_date = '2015-01-01'
end_date   = '2025-06-06'

us_prices = download_price_data(US_TICKERS, start_date, end_date)
in_prices = download_price_data(IN_TICKERS, start_date, end_date)

print("US prices shape:", us_prices.shape)
print("IN prices shape:", in_prices.shape)


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  5 of 5 completed
[*********************100%***********************]  5 of 5 completed

US prices shape: (2622, 5)
IN prices shape: (2573, 5)





In [2]:
# Section 3: SIGNAL CALCULATION
def compute_momentum(prices: pd.DataFrame, lookback_months: int = 12, skip_months: int = 1) -> pd.DataFrame:
    """
    Compute a momentum signal:
      - lookback_months: how many months of data to include in the return calculation
      - skip_months: how many most recent months to skip (to avoid short-term reversal)
    Returns a DataFrame of the same shape as `prices`, containing the momentum value at each date.
    """
    # Approximate trading days per month
    days_per_month = 21
    
    # Calculate window and skip in trading days
    window = lookback_months * days_per_month
    skip = skip_months * days_per_month
    
    # 1. Compute percent change over (window + skip) days
    pct = prices.pct_change(periods=window + skip)
    
    # 2. Shift back by `skip` days so signal at time t uses t-(window+skip) to t-skip
    momentum = pct.shift(-skip)
    
    return momentum

# Section 4: SECTOR-PARITY SELECTION (ensuring exactly top_n picks)
import pandas as pd

def sector_parity_selection(
    signal: pd.Series,
    sector_map: dict[str, str],
    top_n: int = 25
) -> list[str]:
    """
    From a ranked momentum Series, pick exactly top_n tickers,
    enforcing at most one per sector initially. If unique sectors are fewer than top_n,
    fill remaining slots by continuing down the momentum-ranked list.

    1. Select at most one ticker per sector.
    2. If fewer than top_n tickers are selected, fill the rest by pure momentum rank
       excluding already chosen tickers.
    """
    # Get tickers sorted by descending signal
    sorted_tickers = list(signal.sort_values(ascending=False).index)

    # Step 1: one per unique sector
    picked = []
    used_sectors = set()
    for ticker in sorted_tickers:
        if len(picked) >= top_n:
            break
        sector = sector_map.get(ticker)
        if sector not in used_sectors:
            picked.append(ticker)
            used_sectors.add(sector)

    # Step 2: fill remaining slots with top momentum tickers, allowing duplicates
    if len(picked) < top_n:
        for ticker in sorted_tickers:
            if len(picked) >= top_n:
                break
            if ticker not in picked:
                picked.append(ticker)

    return picked


# === Section 3 Test: Compute Momentum ===

# Compute the full momentum DataFrames
momentum_us = compute_momentum(us_prices)
momentum_in = compute_momentum(in_prices)

# Inspect the last few rows to ensure non-NaN values appear
print("US Momentum (last 5 rows):")
print(momentum_us.dropna(how='all').tail(), "\n")
print("IN Momentum (last 5 rows):")
print(momentum_in.dropna(how='all').tail(), "\n")

# === Section 4 Test: Sector-Parity Selection ===

# Determine the last valid rebalance dates
last_us_date = momentum_us.dropna(how='all').index[-1]
last_in_date = momentum_in.dropna(how='all').index[-1]

# Extract the signal Series for those dates
signal_us = momentum_us.loc[last_us_date].dropna()
signal_in = momentum_in.loc[last_in_date].dropna()

# Run the sector-parity selector with top_n=5
picks_us = sector_parity_selection(signal_us, SECTOR_MAP_US, top_n=5)
picks_in = sector_parity_selection(signal_in, SECTOR_MAP_IN, top_n=5)

# Display results
print(f"US Picks on {last_us_date.date()}: {picks_us}")
print(f"IN Picks on {last_in_date.date()}: {picks_in}")




US Momentum (last 5 rows):
Ticker          AAPL      AMZN       JNJ      MSFT       XOM
Date                                                        
2025-04-30  0.193521  0.141354  0.105533  0.143744 -0.095227
2025-05-01  0.169557  0.141965  0.101759  0.159358 -0.101392
2025-05-02  0.200597  0.175486  0.111697  0.200424 -0.084367
2025-05-05  0.205227  0.157709  0.054975  0.185673 -0.080177
2025-05-06  0.166513  0.125541  0.066896  0.186697 -0.086057 

IN Momentum (last 5 rows):
Ticker      HDFCBANK.NS  ICICIBANK.NS   INFY.NS  RELIANCE.NS    TCS.NS
Date                                                                  
2025-04-30     0.307680      0.337534  0.117418    -0.026331 -0.080940
2025-05-02     0.295551      0.333929  0.120101    -0.025152 -0.074119
2025-05-05     0.291333      0.302158  0.106361    -0.037312 -0.091106
2025-05-06     0.303290      0.302752  0.118617    -0.019999 -0.082222
2025-05-07     0.292071      0.264771  0.117338    -0.015443 -0.096432 

US Picks on 2025-0

In [3]:
# Section 4: SECTOR-PARITY SELECTION (ensuring exactly top_n picks)
import pandas as pd

def sector_parity_selection(
    signal: pd.Series,
    sector_map: dict[str, str],
    top_n: int = 25
) -> list[str]:
    """
    From a ranked momentum Series, pick exactly top_n tickers,
    enforcing at most one per sector initially. If unique sectors are fewer than top_n,
    fill remaining slots by continuing down the momentum-ranked list.

    1. Select at most one ticker per sector.
    2. If fewer than top_n tickers are selected, fill the rest by pure momentum rank
       excluding already chosen tickers.
    """
    # Get tickers sorted by descending signal
    sorted_tickers = list(signal.sort_values(ascending=False).index)

    # Step 1: one per unique sector
    picked = []
    used_sectors = set()
    for ticker in sorted_tickers:
        if len(picked) >= top_n:
            break
        sector = sector_map.get(ticker)
        if sector not in used_sectors:
            picked.append(ticker)
            used_sectors.add(sector)

    # Step 2: fill remaining slots with top momentum tickers, allowing duplicates
    if len(picked) < top_n:
        for ticker in sorted_tickers:
            if len(picked) >= top_n:
                break
            if ticker not in picked:
                picked.append(ticker)

    return picked



In [16]:
import pandas as pd
from pandas.tseries.offsets import BDay

def run_backtest(
    prices: pd.DataFrame,
    sector_map: dict[str,str],
    capital: float,
    lookback_months: int = 12,
    skip_months: int = 1,
    stop_loss: float = 0.12,
    freq: str = 'W-FRI'              # use Friday-close for signal
) -> tuple[pd.Series, pd.DataFrame]:
    """
    1) Compute momentum signals
    2) Build weekly positions at each rebalance
    3) Carry forward/week-end stop-loss
    4) Expand positions to daily & compute NAV
    """
    # 1. momentum DF (dates × tickers)
    momentum = compute_momentum(prices, lookback_months, skip_months)

    # 2. Get weekly signal dates (Friday close) and corresponding trade dates (next BDay)
    weekly = prices.resample(freq).last()
    trade_dates = weekly.index + BDay(1)

    # 3. Prepare a weekly positions table (zero initial)
    positions_w = pd.DataFrame(
        0.0,
        index=trade_dates,
        columns=prices.columns
    )

    # 4. Loop over each signal/trade pair
    for sig_date, trade_date in zip(weekly.index, trade_dates):
        # 4a) if no momentum yet, just carry last positions
        if sig_date not in momentum.index:
            # first iteration picks zeros anyway
            positions_w.loc[trade_date] = positions_w.shift().loc[trade_date]
            continue

        # 4b) pick tickers & assign equal weight
        signal = momentum.loc[sig_date].dropna()
        picks  = sector_parity_selection(signal, sector_map, top_n=5)
        if picks:
            w = 1.0 / len(picks)
            row = pd.Series(0.0, index=prices.columns)
            row.loc[picks] = w
        else:
            # if empty, carry forward last week’s weights
            row = positions_w.shift().loc[trade_date]

        positions_w.loc[trade_date] = row

        # 4c) stop-loss: look at prices between last trade_date and this one
        prev_trade = positions_w.index.get_loc(trade_date) - 1
        if prev_trade >= 0:
            last_trade_date = positions_w.index[prev_trade]
            week_prices = prices.loc[last_trade_date:trade_date]
            returns    = week_prices / week_prices.iloc[0] - 1.0
            breaches   = returns.min() < -stop_loss
            for tkr in prices.columns[breaches]:
                positions_w.loc[trade_date, tkr] = 0.0

    # 5. Expand to daily positions, forward-fill, then compute NAV
    daily_pos   = positions_w.reindex(prices.index, method='ffill').fillna(0.0)
    daily_value = (daily_pos * prices).sum(axis=1)

    # 6. Scale NAV to start at `capital`
    first_nonzero = daily_value.ne(0).idxmax()
    factor        = capital / daily_value.loc[first_nonzero]
    nav           = daily_value * factor
    nav.loc[:first_nonzero] = capital

    return nav, positions_w


In [17]:
# === TEST RUN: Sections 1–5 ===

# ---- 1) Compute Momentum (Section 3) ----
momentum_us = compute_momentum(us_prices)
momentum_in = compute_momentum(in_prices)

# Show the first & last few valid momentum rows
print("US Momentum (first/last):")
print(momentum_us.dropna(how='all').head(), "\n", momentum_us.dropna(how='all').tail(), "\n")

print("IN Momentum (first/last):")
print(momentum_in.dropna(how='all').head(), "\n", momentum_in.dropna(how='all').tail(), "\n")

# ---- 2) Sector-Parity Picks (Section 4) ----
# use the last valid date for each
last_us = momentum_us.dropna(how='all').index[-1]
last_in = momentum_in.dropna(how='all').index[-1]

signal_us = momentum_us.loc[last_us].dropna()
signal_in = momentum_in.loc[last_in].dropna()

picks_us = sector_parity_selection(signal_us, SECTOR_MAP_US, top_n=5)
picks_in = sector_parity_selection(signal_in, SECTOR_MAP_IN, top_n=5)

print(f"US Picks on {last_us.date()}: {picks_us}")
print(f"IN Picks on {last_in.date()}: {picks_in}\n")

# ---- 3) Run Full Backtest (Section 5) ----
nav_us, positions_us = run_backtest(us_prices, SECTOR_MAP_US, US_CAPITAL, lookback_months=12, skip_months=1, stop_loss=0.12, freq='W-FRI')
nav_in, positions_in = run_backtest(in_prices, SECTOR_MAP_IN, IN_CAPITAL, lookback_months=12, skip_months=1, stop_loss=0.12, freq='W-FRI')

# 4) Inspect NAV & Positions
print("US NAV (head/tail):")
print(nav_us.head(), "\n", nav_us.tail(), "\n")

print("US Positions (first 5 rows):")
print(positions_us.iloc[:5], "\n")

print("IN NAV (head/tail):")
print(nav_in.head(), "\n", nav_in.tail(), "\n")

print("IN Positions (first 5 rows):")
print(positions_in.iloc[:5])


US Momentum (first/last):
Ticker          AAPL      AMZN       JNJ      MSFT       XOM
Date                                                        
2016-01-04 -0.103708  0.721347  0.025894  0.145851 -0.125582
2016-01-05 -0.070315  0.774579  0.030729  0.152938 -0.085519
2016-01-06 -0.095231  0.700464  0.002321  0.128709 -0.077753
2016-01-07 -0.098347  0.635614 -0.005087  0.097883 -0.074691
2016-01-08 -0.131891  0.604440 -0.013139  0.063702 -0.093800 
 Ticker          AAPL      AMZN       JNJ      MSFT       XOM
Date                                                        
2025-04-30  0.193521  0.141354  0.105533  0.143744 -0.095227
2025-05-01  0.169557  0.141965  0.101759  0.159358 -0.101392
2025-05-02  0.200597  0.175486  0.111697  0.200424 -0.084367
2025-05-05  0.205227  0.157709  0.054975  0.185673 -0.080177
2025-05-06  0.166513  0.125541  0.066896  0.186697 -0.086057 

IN Momentum (first/last):
Ticker      HDFCBANK.NS  ICICIBANK.NS   INFY.NS  RELIANCE.NS    TCS.NS
Date               