In [1]:
import pandas as pd
from dateutil.relativedelta import relativedelta
import numpy as np
import re
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
import warnings
warnings.filterwarnings("ignore")
import math
import os
from datetime import date, timedelta, datetime
import time
from tqdm import tqdm
import seaborn as sns
from scipy import stats
from matplotlib.ticker import MaxNLocator
from matplotlib.backends.backend_pdf import PdfPages
start_time = time.perf_counter()

In [2]:
price_data = pd.read_csv('stockPriceData-2.csv')
price_data_500 = price_data.groupby('Date', group_keys=False).apply(lambda x: x.sort_values(by='Mcap', ascending=False).head(500))
df = price_data[price_data['Symbol'].isin(price_data_500['Symbol'])]
df['Date'] = pd.to_datetime(df['Date'])
master_date = df.drop_duplicates(subset='Date')[['Date']].reset_index(drop=True)
df.set_index('Date', inplace=True)

df = df.sort_values(by=['Date','Mcap'], ascending=[True,False])
df = df.sort_values(['Symbol', 'Date'])
df['PrevClose'] = df.groupby('Symbol')['Close'].shift(1)
df['returns'] = (df['Close'] - df['PrevClose']) / df['PrevClose']

In [3]:
# Function to calculate log returns
def calculate_log_returns(df):
    df['LogReturn'] = np.log(df['Close'] / df['Close'].shift(1))
    return df.dropna()

# Function to calculate momentum ratios
def calculate_momentum_ratios(series, period):
    return series / series.shift(period) - 1

# Define periods for momentum ratios (in trading days)
periods = {
    'MR1': 21}

# Apply log return calculation
df = df.groupby('Symbol', group_keys=False).apply(calculate_log_returns)

# Calculate momentum ratios for each period
for label, period in periods.items():
    df[label] = df.groupby('Symbol')['Close'].transform(lambda x: calculate_momentum_ratios(x, period))

# Remove volatility adjustment (no AnnualizedStd used)

# Calculate cross-sectional mean and std dev per date for each momentum ratio
for label in periods.keys():
    df[f'mu_{label}'] = df.groupby('Date')[label].transform('mean')
    df[f'sigma_{label}'] = df.groupby('Date')[label].transform('std')

# Compute Z-scores for each momentum ratio
for label in periods.keys():
    df[f'Z_{label}'] = (df[label] - df[f'mu_{label}']) / df[f'sigma_{label}']

# Reset index if needed
df = df.reset_index()

In [5]:
# ─── 2) Define 'specific_combinations' as tuples of the labels you want:
specific_combinations = [
    ('MR1',),            # single‐element tuple
    # If later you want to combine MR1 & MR6, you could add ('MR1','MR6'), etc.
]

# ─── 3) Loop through and compute weighted avg Z‐score and normalized momentum:
for comb in specific_combinations:
    # comb is now something like ('MR1',)  or ('MR1','MR6'), etc.
    comb_labels = [f'Z_{label}' for label in comb]       # e.g. ['Z_MR1']
    comb_weights = np.ones(len(comb_labels)) / len(comb_labels)
    comb_name    = "_".join(comb)                         # e.g. 'MR1' or 'MR1_MR6'

    # Make sure each f'Z_{label}' is actually a column in df
    missing = set(comb_labels) - set(df.columns)
    if missing:
        raise KeyError(f"The following Z‐columns are missing: {missing}")

    # Weighted average Z-score
    df[f'WeightedAvgZ_{comb_name}'] = df[comb_labels].dot(comb_weights)

    # Normalized momentum score
    df[f'NormalizedMomentumScore_{comb_name}'] = np.where(
        df[f'WeightedAvgZ_{comb_name}'] >= 0,
        1 + df[f'WeightedAvgZ_{comb_name}'],
        (1 - df[f'WeightedAvgZ_{comb_name}']) ** -1
    )

# ─── 4) Finally, keep only the top 500 by Mcap each Date:
df = (
    df.groupby('Date', group_keys=False)
      .apply(lambda x: x.sort_values(by='Mcap', ascending=False).head(500))
)
df = df[['Date','Symbol','NormalizedMomentumScore_MR1']]
df.rename(columns={'NormalizedMomentumScore_MR1' : 'Short term Reversal'}, inplace=True)
df

Unnamed: 0,Date,Symbol,Short term Reversal
4188216,1995-06-19,RELIANCE,
4994167,1995-06-19,TATASTEEL,
2145172,1995-06-19,HINDPETRO,
1854073,1995-06-19,GRASIM,
3495033,1995-06-19,NESTLEIND,
...,...,...,...
3587922,2025-06-13,NSLNISP,0.826215
1662990,2025-06-13,GENUSPOWER,1.778813
5575282,2025-06-13,WESTLIFE,0.761903
891441,2025-06-13,CCL,1.757292


In [7]:
df['Short term Reversal Rank'] = df.groupby('Date')['Short term Reversal'].transform(lambda x: x.rank(pct=True))
df

Unnamed: 0,Date,Symbol,Short term Reversal,Short term Reversal Rank
4188216,1995-06-19,RELIANCE,,
4994167,1995-06-19,TATASTEEL,,
2145172,1995-06-19,HINDPETRO,,
1854073,1995-06-19,GRASIM,,
3495033,1995-06-19,NESTLEIND,,
...,...,...,...,...
3587922,2025-06-13,NSLNISP,0.826215,0.505030
1662990,2025-06-13,GENUSPOWER,1.778813,0.885312
5575282,2025-06-13,WESTLIFE,0.761903,0.452716
891441,2025-06-13,CCL,1.757292,0.877264


In [8]:
df = df[['Date','Symbol','Short term Reversal Rank']]
df

Unnamed: 0,Date,Symbol,Short term Reversal Rank
4188216,1995-06-19,RELIANCE,
4994167,1995-06-19,TATASTEEL,
2145172,1995-06-19,HINDPETRO,
1854073,1995-06-19,GRASIM,
3495033,1995-06-19,NESTLEIND,
...,...,...,...
3587922,2025-06-13,NSLNISP,0.505030
1662990,2025-06-13,GENUSPOWER,0.885312
5575282,2025-06-13,WESTLIFE,0.452716
891441,2025-06-13,CCL,0.877264


In [9]:
df.to_csv('ShortTermReversal.csv', index=False)