In [1]:
import pandas as pd
import numpy as np
from datetime import date
from dateutil.relativedelta import relativedelta 
import warnings
warnings.filterwarnings("ignore")

In [2]:
# ===============================
# 05_return_calculations_corrected.py
# Industry-standard Return Calculations
# ===============================

# ----------------------------
# XIRR Manual Implementation
# ----------------------------
def xirr_manual(cashflows, guess=0.1, tol=1e-6, max_iter=100):
    """
    cashflows: list of dicts [{'date': datetime.date, 'amount': float}]
    Returns IRR as decimal (0.12 = 12%)
    """
    if len(cashflows) < 2:
        return np.nan
    d0 = pd.Timestamp(cashflows[0]['date'])
    times = np.array([(pd.Timestamp(cf['date']) - d0).days / 365 for cf in cashflows])
    amounts = np.array([cf['amount'] for cf in cashflows])
    
    rate = guess
    for i in range(max_iter):
        npv = np.sum(amounts / (1 + rate) ** times)
        d_npv = np.sum(-times * amounts / (1 + rate) ** (times + 1))
        new_rate = rate - npv / d_npv
        if abs(new_rate - rate) < tol:
            return rate
        rate = new_rate
    return np.nan

# ----------------------------
# Load data
# ----------------------------
df = pd.read_csv("../data/master_table.csv")
df['date'] = pd.to_datetime(df['date'])
df['nav'] = df['nav'].astype(float)
df.sort_values(['fund_name','date'], inplace=True)
df.reset_index(drop=True, inplace=True)

funds_list = df['fund_name'].unique()


# ----------------------------
# Step 1: Daily Returns (Last 21 Days Only)
# ----------------------------
daily_returns_list = []

for fund in funds_list:
    fund_df = df[df['fund_name'] == fund].copy().sort_values('date')
    last_21_dates = fund_df['date'].nlargest(21)
    recent_df = fund_df[fund_df['date'].isin(last_21_dates)].sort_values('date')
    recent_df['daily_return'] = recent_df['nav'].pct_change()
    daily_returns_list.append(recent_df[['fund_name','date','nav','daily_return']])

df_daily_recent = pd.concat(daily_returns_list, ignore_index=True)

# ----------------------------
# Step 2: P2P Returns (<1yr)
# ----------------------------
def compute_p2p_returns(fund_df, periods=['1M','3M','6M','YTD']):
    results = {}
    last_date = fund_df['date'].max()
    for period in periods:
        if period == 'YTD':
            start_date = pd.Timestamp(year=last_date.year, month=1, day=1)
        else:
            months = int(period.replace('M',''))
            start_date = last_date - relativedelta(months=months)
        subset = fund_df[fund_df['date'] >= start_date]
        if len(subset) < 2:
            results[period] = np.nan
            continue
        results[period] = (subset['nav'].iloc[-1] - subset['nav'].iloc[0]) / subset['nav'].iloc[0]
    return results

p2p_returns = {fund: compute_p2p_returns(df[df['fund_name']==fund]) for fund in funds_list}

# ----------------------------
# Step 3: Lumpsum CAGR (≥1 yr)
# ----------------------------
def compute_cagr(fund_df, periods=[1,3,5,10]):
    results = {}
    last_date = fund_df['date'].max()
    for yr in periods:
        start_date = last_date - relativedelta(years=yr)
        subset = fund_df[fund_df['date'] >= start_date]
        if len(subset) < 2:
            results[f'{yr}Y'] = np.nan
            continue
        start_nav = subset['nav'].iloc[0]
        end_nav = subset['nav'].iloc[-1]
        results[f'{yr}Y'] = (end_nav / start_nav) ** (1/yr) - 1
    # Since Inception
    n_years = (last_date - fund_df['date'].iloc[0]).days / 365.25
    results['Since Inception'] = (fund_df['nav'].iloc[-1] / fund_df['nav'].iloc[0]) ** (1/n_years) - 1
    return results

lumpsum_cagr = {fund: compute_cagr(df[df['fund_name']==fund]) for fund in funds_list}

# ----------------------------
# Step 4: SIP XIRR (≥1 yr) - First day of month cashflows
# ----------------------------
def compute_sip_xirr(fund_df, sip_amount=1000, periods=[1,3,5,10]):
    results = {}
    fund_df = fund_df.copy()
    fund_df.sort_values('date', inplace=True)
    last_date = fund_df['date'].max()

    for yr in periods:
        start_date = last_date - relativedelta(years=yr)
        subset = fund_df[fund_df['date'] >= start_date].copy()
        if len(subset) < 2:
            results[f'{yr}Y'] = np.nan
            continue

        # Monthly cashflows using NAV of the first day of each month
        subset['month'] = subset['date'].dt.to_period('M')
        first_of_month = subset.groupby('month').first().reset_index()
        cashflows = [{'amount': -sip_amount, 'date': row['date'].date()} for _, row in first_of_month.iterrows()]
        total_units = sum([-c['amount'] / subset.loc[subset['date'] == pd.Timestamp(c['date']), 'nav'].iloc[0] for c in cashflows])
        cashflows.append({'amount': total_units * subset['nav'].iloc[-1], 'date': subset['date'].iloc[-1].date()})

        results[f'{yr}Y'] = xirr_manual(cashflows)

    # Since Inception
    subset = fund_df.copy()
    subset['month'] = subset['date'].dt.to_period('M')
    first_of_month = subset.groupby('month').first().reset_index()
    cashflows = [{'amount': -sip_amount, 'date': row['date'].date()} for _, row in first_of_month.iterrows()]
    total_units = sum([-c['amount'] / subset.loc[subset['date'] == pd.Timestamp(c['date']), 'nav'].iloc[0] for c in cashflows])
    cashflows.append({'amount': total_units * subset['nav'].iloc[-1], 'date': subset['date'].iloc[-1].date()})
    results['Since Inception'] = xirr_manual(cashflows)

    return results

sip_xirr = {fund: compute_sip_xirr(df[df['fund_name']==fund]) for fund in funds_list}



# ----------------------------
# Step 5: Rolling Returns (CAGR Time Series)
# ----------------------------
def compute_rolling_cagr_series(fund_df, windows_years=[1/12, 0.5, 1, 3, 5]):
    """
    Optimized Rolling CAGR calculation (Daily frequency, multiple sliding windows)

    Parameters
    ----------
    fund_df : pd.DataFrame
        Must contain columns ['fund_name', 'date', 'nav']
    windows_years : list
        Rolling window lengths in years (default includes 1M, 6M, 1Y, 3Y, 5Y)

    Returns
    -------
    pd.DataFrame : ['fund_name', 'date', 'period', 'rolling_cagr']
    """
    fund_df = fund_df.copy().sort_values('date').reset_index(drop=True)
    fund_name = fund_df['fund_name'].iloc[0]

    # 🔹 Step 1: Keep only last 5 years of data
    cutoff_date = fund_df['date'].max() - pd.Timedelta(days=5 * 365)
    fund_df = fund_df[fund_df['date'] >= cutoff_date].reset_index(drop=True)

    results = []

    # 🔹 Step 2: Compute rolling CAGR for each window at daily frequency
    for w in windows_years:
        # Convert fractional years to days
        window_days = int(w * 365.25)

        for i in range(len(fund_df)):
            end_date = fund_df.loc[i, 'date']
            start_date = end_date - pd.Timedelta(days=window_days)

            subset = fund_df[(fund_df['date'] >= start_date) & (fund_df['date'] <= end_date)]
            if len(subset) < 2:
                continue

            start_nav = subset['nav'].iloc[0]
            end_nav = subset['nav'].iloc[-1]
            delta_years = (subset['date'].iloc[-1] - subset['date'].iloc[0]).days / 365.25

            if start_nav > 0 and delta_years > 0:
                cagr = (end_nav / start_nav) ** (1 / delta_years) - 1
                # Format period nicely
                if w < 1:
                    months = int(w * 12)
                    period_label = f'{months}M'
                else:
                    period_label = f'{int(w)}Y'

                results.append({
                    'fund_name': fund_name,
                    'date': end_date,
                    'period': period_label,
                    'rolling_cagr': cagr
                })

    return pd.DataFrame(results)


# Run for all funds
rolling_all_funds = []

for fund in funds_list:
    fund_df = df[df['fund_name'] == fund].copy()
    roll_df = compute_rolling_cagr_series(fund_df)
    rolling_all_funds.append(roll_df)

rolling_all_funds_df = pd.concat(rolling_all_funds, ignore_index=True)

# ----------------------------
# Step 6: Save results
# ----------------------------
df_daily_recent.to_csv("../data/processed/daily_return.csv", index=False)
pd.DataFrame(p2p_returns).T.to_csv("../data/processed/p2p_returns.csv")
pd.DataFrame(lumpsum_cagr).T.to_csv("../data/processed/lumpsum_cagr.csv")
pd.DataFrame(sip_xirr).T.to_csv("../data/processed/sip_xirr.csv")
rolling_all_funds_df.to_csv("../data/processed/rolling_all_funds.csv", index=False)

In [3]:
# ===============================
# Display Sample Outputs for Verification
# ===============================

# --- Daily Return Sample (Last 21 Days) ---
print("=== Daily Return Sample (Most Recent 21 Days) ===")
display(df_daily_recent.groupby('fund_name').tail(5))

# --- P2P Returns Sample ---
p2p_df = pd.DataFrame(p2p_returns).T
print("\n=== P2P Returns Sample ===")
display(p2p_df.head(9))

# --- Lumpsum CAGR Sample (Annualised Returns) ---
lumpsum_df = pd.DataFrame(lumpsum_cagr).T
print("\n=== Lumpsum CAGR Sample (Annualised Returns) ===")
display(lumpsum_df.head(9))

# --- SIP XIRR Sample (Annualised Returns) ---
sip_xirr_df = pd.DataFrame(sip_xirr).T
print("\n=== SIP XIRR Sample (Annualised Returns) ===")
display(sip_xirr_df.head(9))

# --- Rolling Returns Sample (Time-Series Data) ---
print("\n=== Rolling Returns Sample (Continuous Time-Series) ===")
print(f"Total Records: {len(rolling_all_funds_df)}")
print("Columns:", list(rolling_all_funds_df.columns))
print("\nShowing last few records for one sample fund:")

# Pick one sample fund (e.g., HDFC Corporate Bond Fund)
sample_fund = rolling_all_funds_df['fund_name'].unique()[0]
sample_df = rolling_all_funds_df[rolling_all_funds_df['fund_name'] == sample_fund].copy()

# Show last few rolling entries for each period (1Y, 3Y, 5Y)
for period in ['1Y', '3Y', '5Y']:
    print(f"\n--- {sample_fund} | {period} Rolling Returns ---")
    display(sample_df[sample_df['period'] == period].tail(5))


=== Daily Return Sample (Most Recent 21 Days) ===


Unnamed: 0,fund_name,date,nav,daily_return
16,DSP Midcap Fund,2025-10-25,165.605,0.0
17,DSP Midcap Fund,2025-10-26,165.605,0.0
18,DSP Midcap Fund,2025-10-27,167.45,0.011141
19,DSP Midcap Fund,2025-10-28,167.138,-0.001863
20,DSP Midcap Fund,2025-10-29,167.774,0.003805
37,HDFC Corporate Bond Fund,2025-10-25,33.9696,0.0
38,HDFC Corporate Bond Fund,2025-10-26,33.9696,0.0
39,HDFC Corporate Bond Fund,2025-10-27,33.9787,0.000268
40,HDFC Corporate Bond Fund,2025-10-28,33.9859,0.000212
41,HDFC Corporate Bond Fund,2025-10-29,33.996,0.000297



=== P2P Returns Sample ===


Unnamed: 0,1M,3M,6M,YTD
DSP Midcap Fund,0.042081,0.02453,0.12477,0.031338
HDFC Corporate Bond Fund,0.008933,0.01272,0.028972,0.069601
HDFC Large and Mid Cap Fund,0.055357,0.041757,0.103667,0.073603
ICICI Prudential Balanced Advantage Fund,0.028749,0.045023,0.077947,0.113156
ICICI Prudential Large Cap Fund (erstwhile Bluechip Fund),0.045255,0.048552,0.084091,0.106208
Nippon India Small Cap Fund,0.036622,0.013795,0.105306,-0.014383
SBI Large & Midcap Fund,0.048496,0.032668,0.104029,0.088264
UTI Nifty 50 Index Fund,0.057527,0.052124,0.079461,0.10858



=== Lumpsum CAGR Sample (Annualised Returns) ===


Unnamed: 0,1Y,3Y,5Y,10Y,Since Inception
DSP Midcap Fund,0.060378,0.209685,0.207151,0.162106,0.177979
HDFC Corporate Bond Fund,0.081669,0.082115,0.064878,0.078348,0.081478
HDFC Large and Mid Cap Fund,0.07061,0.224649,0.280462,0.156644,0.142543
ICICI Prudential Balanced Advantage Fund,0.109848,0.143208,0.155727,0.122589,0.132602
ICICI Prudential Large Cap Fund (erstwhile Bluechip Fund),0.0814,0.19331,0.230253,0.156648,0.160873
Nippon India Small Cap Fund,0.00292,0.244299,0.341375,0.216792,0.250459
SBI Large & Midcap Fund,0.080974,0.183038,0.250159,0.160312,0.174143
UTI Nifty 50 Index Fund,0.076158,0.146183,0.185563,0.134597,0.131326



=== SIP XIRR Sample (Annualised Returns) ===


Unnamed: 0,1Y,3Y,5Y,10Y,Since Inception
DSP Midcap Fund,0.143454,0.194221,0.17936,0.169334,0.178497
HDFC Corporate Bond Fund,0.077992,0.082977,0.073976,0.075731,0.078115
HDFC Large and Mid Cap Fund,0.161331,0.198677,0.21534,0.187113,0.17382
ICICI Prudential Balanced Advantage Fund,0.153192,0.145534,0.14001,0.131051,0.131168
ICICI Prudential Large Cap Fund (erstwhile Bluechip Fund),0.156919,0.182515,0.186548,0.170837,0.165023
Nippon India Small Cap Fund,0.095732,0.19213,0.245368,0.238763,0.24823
SBI Large & Midcap Fund,0.156088,0.178622,0.191649,0.178813,0.176241
UTI Nifty 50 Index Fund,0.156243,0.143259,0.143757,0.145392,0.138785



=== Rolling Returns Sample (Continuous Time-Series) ===
Total Records: 73000
Columns: ['fund_name', 'date', 'period', 'rolling_cagr']

Showing last few records for one sample fund:

--- DSP Midcap Fund | 1Y Rolling Returns ---


Unnamed: 0,fund_name,date,period,rolling_cagr
5470,DSP Midcap Fund,2025-10-25,1Y,0.057665
5471,DSP Midcap Fund,2025-10-26,1Y,0.057665
5472,DSP Midcap Fund,2025-10-27,1Y,0.069457
5473,DSP Midcap Fund,2025-10-28,1Y,0.064863
5474,DSP Midcap Fund,2025-10-29,1Y,0.06042



--- DSP Midcap Fund | 3Y Rolling Returns ---


Unnamed: 0,fund_name,date,period,rolling_cagr
7295,DSP Midcap Fund,2025-10-25,3Y,0.203005
7296,DSP Midcap Fund,2025-10-26,3Y,0.200807
7297,DSP Midcap Fund,2025-10-27,3Y,0.209063
7298,DSP Midcap Fund,2025-10-28,3Y,0.208311
7299,DSP Midcap Fund,2025-10-29,3Y,0.209843



--- DSP Midcap Fund | 5Y Rolling Returns ---


Unnamed: 0,fund_name,date,period,rolling_cagr
9120,DSP Midcap Fund,2025-10-25,5Y,0.204965
9121,DSP Midcap Fund,2025-10-26,5Y,0.204842
9122,DSP Midcap Fund,2025-10-27,5Y,0.207396
9123,DSP Midcap Fund,2025-10-28,5Y,0.20682
9124,DSP Midcap Fund,2025-10-29,5Y,0.207614
