In [3]:
import pandas as pd
df = pd.read_csv(r"C:/Users/BALA/OneDrive - University of Hertfordshire/Desktop/final project/app/data/processed/preprocessed_mutual_funds.csv")

In [7]:
import pandas as pd
import numpy as np

# 1. Load preprocessed raw data
df = pd.read_csv(r"C:/Users/BALA/OneDrive - University of Hertfordshire/Desktop/final project/app/data/processed/preprocessed_mutual_funds.csv")

# 2. Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# 3. Sort by Scheme_Code and Date for time-series calculations
df = df.sort_values(by=['Scheme_Code', 'Date']).reset_index(drop=True)

# 4. Calculate Daily Return (%)
df['Daily_Return'] = df.groupby('Scheme_Code')['NAV'].pct_change() * 100

# 5. Rolling 30-day NAV Mean & Std Dev
df['Rolling_Mean_NAV'] = df.groupby('Scheme_Code')['NAV'] \
                           .transform(lambda x: x.rolling(window=30, min_periods=1).mean())

df['Rolling_Std_NAV'] = df.groupby('Scheme_Code')['NAV'] \
                          .transform(lambda x: x.rolling(window=30, min_periods=1).std())

# 6. CAGR Function (per Scheme_Code)
def calculate_cagr(sub_df):
    try:
        if len(sub_df) < 2:
            return np.nan
        start_nav = sub_df['NAV'].iloc[0]
        end_nav = sub_df['NAV'].iloc[-1]
        n_years = (sub_df['Date'].iloc[-1] - sub_df['Date'].iloc[0]).days / 365.25
        if n_years == 0 or start_nav == 0:
            return np.nan
        return ((end_nav / start_nav) ** (1 / n_years) - 1) * 100
    except:
        return np.nan

# Step 7: Calculate CAGR per Scheme_Code and map to full DataFrame
cagr_values = df.groupby('Scheme_Code', group_keys=False)[['Date', 'NAV']] \
                .apply(calculate_cagr)

df['CAGR'] = df['Scheme_Code'].map(cagr_values)

# 8. Save updated dataset
df.to_csv(r"C:/Users/BALA/OneDrive - University of Hertfordshire/Desktop/final project/app/data/processed/preprocessed_mutual_funds.csv", index=False)

print(" Feature engineering completed. Data saved to: data/processed/preprocessed_mutual_funds.csv")


 Feature engineering completed. Data saved to: data/processed/preprocessed_mutual_funds.csv
