In [26]:
import os
import glob
import pickle
import pandas as pd
import numpy as np

# -----------------------
# CONFIG
# -----------------------
STOCK_FOLDER = "live_stocks_data_final"
MUTUAL_FOLDER = "mutualfund_data_final"

N_STOCKS = 5
N_FUNDS = 5

# Fixed 5% yearly growth in monthly contribution
ANNUAL_GROWTH_RATE = 0.05

# Annual return assumptions used for  projection (can be adjusted)
ASSET_RETURN_ASSUMPTIONS = {
    "Stocks": 0.12,
    "Mutual Funds": 0.08,
    "Gold": 0.06,
    "PPF": 0.07,
    "Savings": 0.03,
    "Emergency Fund": 0.03
}

# Base allocations by risk label
BASE_ALLOCATIONS = {
    "Aggressive": {"Stocks": 60, "Mutual Funds": 25, "Gold": 5, "PPF": 5, "Savings": 3, "Emergency Fund": 2},
    "Moderate":   {"Stocks": 40, "Mutual Funds": 35, "Gold": 10, "PPF": 10, "Savings": 3, "Emergency Fund": 2},
    "Conservative":{"Stocks": 10, "Mutual Funds": 15, "Gold": 15, "PPF": 20, "Savings": 30, "Emergency Fund": 10}
}

# Goal tilts (small adjustments)
GOAL_TILTS = {
    "Retirement": {"Stocks": +5, "Mutual Funds": +5, "Gold": 0, "PPF": +5, "Savings": -10, "Emergency Fund": -5},
    "House":      {"Stocks": -5, "Mutual Funds": +10, "Gold": 0, "PPF": +10, "Savings": -10, "Emergency Fund": -5},
    "Travel":     {"Stocks": -10,"Mutual Funds": +5, "Gold": +5, "PPF": -5, "Savings": +5, "Emergency Fund": 0},
    "Child":      {"Stocks": -5, "Mutual Funds": +10,"Gold": +5, "PPF": +5, "Savings": -10, "Emergency Fund": -5},
    "Growth":     {"Stocks": +10,"Mutual Funds": +5, "Gold": -5, "PPF": -5, "Savings": -3, "Emergency Fund": -2},
}

# Risk filters for picks
RISK_TO_STOCK_FILTER = {
    "Aggressive": ["High", "Medium"],
    "Moderate": ["Medium"],
    "Conservative": ["Low", "Medium"]
}
RISK_TO_MF_FILTER = {
    "Aggressive": ["Medium Risk", "High Risk"],
    "Moderate": ["Medium Risk"],
    "Conservative": ["Low Risk"]
}

# Leftover goes to Savings/Emergency Fund split
LEFTOVER_SPLIT = {"Savings": 0.7, "Emergency Fund": 0.3}

# -----------------------
# UTILITIES
# -----------------------
def get_latest_csv(folder):
    if not os.path.isdir(folder):
        return None
    files = sorted(glob.glob(os.path.join(folder, "*.csv")), key=os.path.getmtime, reverse=True)
    return files[0] if files else None

def normalize_allocation(alloc):
    a = {k: max(0.0, float(v)) for k, v in alloc.items()}
    s = sum(a.values())
    if s == 0:
        n = len(a)
        return {k: 100.0/n for k in a}
    return {k: (v/s)*100.0 for k,v in a.items()}

def apply_goal_tilt(base, goal):
    tilt = GOAL_TILTS.get(goal, {})
    combined = {k: base.get(k,0) + tilt.get(k,0) for k in base}
    return normalize_allocation(combined)

def ensure_leftover(allocation):
    alloc = allocation.copy()
    total = sum(alloc.values())
    if total >= 99.999:
        return normalize_allocation(alloc)
    leftover = 100.0 - total
    for k, frac in LEFTOVER_SPLIT.items():
        alloc[k] = alloc.get(k,0.0) + leftover * frac
    return normalize_allocation(alloc)

def rupees(x):
    return f"‚Çπ{x:,.2f}"

def read_stock_df(path):
    df = pd.read_csv(path)
    # ensure necessary columns
    # expected columns used: STOCK, avg_return_30d, volatility_30d, optional risk_level
    if "STOCK" not in df.columns or "avg_return_30d" not in df.columns:
        raise ValueError("Stock CSV must include at least columns: STOCK, avg_return_30d, volatility_30d (optional risk_level).")
    df = df.dropna(subset=["STOCK"]).drop_duplicates(subset=["STOCK"])
    return df

def read_mutual_df(path):
    df = pd.read_csv(path)
    # expect Scheme Name and Volatility (90d)
    if "Scheme Name" not in df.columns:
        for c in df.columns:
            if "Scheme" in c and "Name" in c:
                df.rename(columns={c: "Scheme Name"}, inplace=True)
                break
    if "Volatility (90d)" not in df.columns:
        for c in df.columns:
            if "Volatility" in c:
                df.rename(columns={c: "Volatility (90d)"}, inplace=True)
                break
    if "Scheme Name" not in df.columns or "Volatility (90d)" not in df.columns:
        raise ValueError("Mutual CSV must include 'Scheme Name' and 'Volatility (90d)'.")
    # if Risk Category missing, bucket by volatility
    if "Risk Category" not in df.columns:
        q_low, q_high = df["Volatility (90d)"].quantile([0.33, 0.66])
        def bucket(v):
            if pd.isna(v): return "Medium Risk"
            if v <= q_low: return "Low Risk"
            if v >= q_high: return "High Risk"
            return "Medium Risk"
        df["Risk Category"] = df["Volatility (90d)"].apply(bucket)
    df = df.dropna(subset=["Scheme Name"]).drop_duplicates(subset=["Scheme Name"])
    return df

def pick_stocks(stock_df, risk_label, goal):
    allowed = RISK_TO_STOCK_FILTER.get(risk_label, ["Medium"])
    df = stock_df.copy()
    if "risk_level" in df.columns:
        df = df[df["risk_level"].isin(allowed)]
    # goal-specific sorting: conservative goals prefer lower volatility
    if goal in ("Retirement","House","Child","Travel"):
        df = df.sort_values(by=["volatility_30d","avg_return_30d"], ascending=[True, False])
    else:
        df = df.sort_values(by=["avg_return_30d","volatility_30d"], ascending=[False, True])
    if df.empty:
        df = stock_df.sort_values(by=["avg_return_30d","volatility_30d"], ascending=[False, True])
    pool = df.head(max(3*N_STOCKS, N_STOCKS))
    sample_n = min(N_STOCKS, len(pool))
    return pool.sample(sample_n, replace=False)[["STOCK","avg_return_30d","volatility_30d"]]

def pick_mutuals(mutual_df, risk_label, goal):
    allowed = RISK_TO_MF_FILTER.get(risk_label, ["Medium Risk"])
    df = mutual_df[mutual_df["Risk Category"].isin(allowed)].copy()
    if goal in ("Retirement","House","Child","Travel"):
        df = df.sort_values(by=["Volatility (90d)"], ascending=True)
    else:
        df = df.sort_values(by=["Volatility (90d)"], ascending=True)
    if df.empty:
        df = mutual_df.sort_values(by=["Volatility (90d)"], ascending=True)
    pool = df.head(max(3*N_FUNDS, N_FUNDS))
    sample_n = min(N_FUNDS, len(pool))
    df["Scheme Name"] = df["Scheme Name"].apply(lambda x: " ".join(str(x).split()[:8]) + ("..." if len(str(x).split())>8 else ""))
    return pool.sample(sample_n, replace=False)[["Scheme Name","Volatility (90d)","Risk Category"]]

# -----------------------
# Simulation functions
# simulate monthly with step-up every 12 months
# -----------------------
def simulate_stepup_sip(start_monthly, growth_rate, years, allocation_pct, asset_returns):
    """
    Simulate month-by-month for 'years' years.
    start_monthly: initial total monthly budget (not per asset)
    growth_rate: annual pct growth applied each 12 months (e.g., 0.05)
    allocation_pct: dict asset->percent (sums to 100)
    asset_returns: dict asset->annual_return
    Returns:
      - totals_by_asset: dict asset->final FV after years
      - principal_by_asset: dict asset->principal contributed
      - yearly_snapshots: list of dicts per year with cumulative principal and portfolio value
    """
    months = years * 12
    monthly_budget = start_monthly
    totals = {asset: 0.0 for asset in allocation_pct}
    principal = {asset: 0.0 for asset in allocation_pct}
    yearly_snapshots = []
    # initialize per-asset balances
    balances = {asset: 0.0 for asset in allocation_pct}
    for m in range(1, months+1):
        # determine current year (1-based)
        year_idx = (m-1) // 12 + 1
        # if new year and m>1 and (m-1)%12==0 then monthly_budget has already been adjusted at start of that year
        # monthly contribution this month = monthly_budget
        for asset, pct in allocation_pct.items():
            monthly_contrib = monthly_budget * (pct/100.0)
            principal[asset] += monthly_contrib
            # monthly growth rate for the asset
            annual_r = asset_returns.get(asset, 0.0)
            monthly_r = annual_r / 12.0
            # first add contribution then compound existing balance for this month
            # we model as contribution at start of month and then compound month interest
            balances[asset] = (balances[asset] + monthly_contrib) * (1 + monthly_r)
        # at year end collect snapshot
        if m % 12 == 0:
            # compute cumulative principal and total portfolio value
            cum_principal = sum(principal.values())
            total_value = sum(balances.values())
            snapshot = {
                "year": year_idx,
                "monthly_invest": monthly_budget,
                "annual_invested": monthly_budget * 12.0,
                "cumulative_principal": cum_principal,
                "portfolio_value": total_value,
                "profit": total_value - cum_principal
            }
            yearly_snapshots.append(snapshot)
            # step up monthly_budget for next year
            monthly_budget = monthly_budget * (1 + growth_rate)
    # final totals_by_asset
    totals_by_asset = {asset: balances[asset] for asset in balances}
    principal_by_asset = {asset: principal[asset] for asset in principal}
    return totals_by_asset, principal_by_asset, yearly_snapshots

# -----------------------
# Main program
# -----------------------
def run_investment_advisor():
    # load latest files
    stock_file = get_latest_csv(STOCK_FOLDER)
    mf_file = get_latest_csv(MUTUAL_FOLDER)
    if not stock_file or not mf_file:
        print("‚ùå Could not find latest CSVs. Ensure folders exist and contain CSVs:", STOCK_FOLDER, MUTUAL_FOLDER)
        return

    print(f"üìÅ Latest stock data file: {os.path.basename(stock_file)}")
    print(f"üìÅ Latest mutual fund file: {os.path.basename(mf_file)}")

    stock_df = read_stock_df(stock_file)
    mutual_df = read_mutual_df(mf_file)

    # USER INPUTS
    print("\nüë§ Enter your financial profile:")
    age = int(input("Enter your Age: ").strip())
    income = float(input("Enter your Monthly Income (‚Çπ): ").strip())

    print("\nEducation Level (1‚Äì5):")
    print("1 = No Formal Education\n2 = High School\n3 = Bachelor‚Äôs Degree\n4 = Master‚Äôs Degree\n5 = Doctorate or Professional Degree")
    education = int(input("Enter your Education Level: ").strip())

    print("\nüìã Risk Questions (1 = Strongly Disagree | 2 = Disagree | 3 = Neutral | 4 = Agree | 5 = Strongly Agree)")
    qs = [
        "I prefer safer investments even if returns are low (1‚Äì5): ",
        "I like taking financial risks if the rewards are high (1‚Äì5): ",
        "I track my expenses and budget regularly (1‚Äì5): ",
        "I feel confident managing my finances (1‚Äì5): ",
        "I get anxious when markets fluctuate (1‚Äì5): ",
        "I prefer guaranteed returns over variable ones (1‚Äì5): ",
        "I invest for long-term goals (1‚Äì5): ",
        "I panic sell during downturns (1‚Äì5): ",
        "I consult a financial advisor before investing (1‚Äì5): ",
        "I review my portfolio frequently (1‚Äì5): ",
        "I use multiple investment channels (1‚Äì5): ",
        "I like reading about finance/investing (1‚Äì5): "
    ]
    f30 = [int(input(q).strip()) for q in qs]

    print("\nüéØ Primary Goal:")
    print("1 = Retirement\n2 = Buying a House\n3 = Travel\n4 = Child‚Äôs Education\n5 = Wealth Growth")
    goal_choice = int(input("Choose 1‚Äì5: ").strip())
    goal_map = {1:"Retirement", 2:"House", 3:"Travel", 4:"Child", 5:"Growth"}
    goal = goal_map.get(goal_choice, "Growth")

    start_monthly = float(input("\nüí∏ How much do you want to invest this month (‚Çπ)?: ").strip())

    # load model/scaler/encoder
    with open("model.pkl", "rb") as f:
        model = pickle.load(f)
    with open("scaler.pkl", "rb") as f:
        scaler = pickle.load(f)
    with open("encoder.pkl", "rb") as f:
        encoder = pickle.load(f)

    # prepare features and predict
    feature_names = ["S_Age","S_Income","S_Education"] + [f"F30_{i}" for i in range(1,13)]
    features = [age, income, education] + f30
    X = pd.DataFrame([features], columns=feature_names)
    X_scaled = scaler.transform(X)  # scaler was fitted with same feature names earlier
    pred_idx = model.predict(X_scaled)[0]
    risk_label = encoder.inverse_transform([pred_idx])[0]
    print(f"\nüß† Predicted Risk Profile: {risk_label}")

    # compute allocations
    base_alloc = BASE_ALLOCATIONS.get(risk_label, BASE_ALLOCATIONS["Moderate"])
    tilted = apply_goal_tilt(base_alloc, goal)
    final_alloc = ensure_leftover(tilted)

    print("\nüÆÆ Final Suggested Allocation (after leftover distribution):")
    for k,v in final_alloc.items():
        amt = start_monthly * v/100.0
        print(f"{k:<15}: {v:6.2f}%  = {rupees(amt)}")

    # pick recommendations
    stock_picks = pick_stocks(stock_df, risk_label, goal)
    mf_picks = pick_mutuals(mutual_df, risk_label, goal)

    # compute per-pick invest splits
    stock_monthly = start_monthly * final_alloc["Stocks"]/100.0
    mf_monthly = start_monthly * final_alloc["Mutual Funds"]/100.0
    per_stock = stock_monthly / max(1, len(stock_picks))
    per_mf = mf_monthly / max(1, len(mf_picks))

    print("\nüìà Recommended Stocks (Stock Allocation Split):")
    for _, r in stock_picks.iterrows():
        print(f"{r['STOCK']:<12} | Invest {rupees(per_stock)} | Avg Return 30d: {r['avg_return_30d']*100:,.2f}%")

    print("\nüìä Recommended Mutual Funds (Fund Allocation Split):")
    for _, r in mf_picks.iterrows():
        name = r["Scheme Name"]
        vol = r["Volatility (90d)"]
        print(f"{name[:60]:<60} | Invest {rupees(per_mf)} | Vol(90d): {vol:,.4f}")

    # SIMULATE with step-up SIP and get snapshots up to 10 years
    assets = list(final_alloc.keys())
    asset_returns = {a: ASSET_RETURN_ASSUMPTIONS.get(a, 0.03) for a in assets}
    totals_by_asset, principal_by_asset, yearly_snapshots_10y = simulate_stepup_sip(
        start_monthly, ANNUAL_GROWTH_RATE, 10, final_alloc, asset_returns
    )

    # We need totals for 3y, 5y, 10y. Simulate again for each horizon
    horizons = (3,5,10)
    totals_horizon = {}
    principals_horizon = {}
    for h in horizons:
        t_assets, p_assets, snaps = simulate_stepup_sip(start_monthly, ANNUAL_GROWTH_RATE, h, final_alloc, asset_returns)
        totals_horizon[h] = sum(t_assets.values())
        principals_horizon[h] = sum(p_assets.values())

    # Print SIP projection by asset (for 3/5/10 we will show numbers)
    print("\nüìÖ SIP-Style Projection by Asset (Assumed annual returns):")
    for asset in assets:
        # show monthly contribution for asset now (first year)
        monthly_asset = start_monthly * final_alloc[asset] / 100.0
        line = f"{asset:<15} | Monthly {rupees(monthly_asset)}"
        for yrs in horizons:
            t_assets, p_assets, snaps = simulate_stepup_sip(start_monthly, ANNUAL_GROWTH_RATE, yrs, final_alloc, asset_returns)
            value = t_assets.get(asset, 0.0)
            line += f" | {yrs}y: {rupees(value)}"
        print(line)

    # SIP Summary table (Invested, Profit, Total)
    print("\nüßæ SIP Summary (All Assets Combined)")
    print(f"{'Period':<6} | {'Invested Amount':>16} | {'Profit (Returns)':>16} | {'Total Value':>16}")
    print("-"*64)
    for yrs in horizons:
        invested = start_monthly * 12 * yrs
        total_value = totals_horizon[yrs]
        profit = total_value - invested
        print(f"{str(yrs)+'y':<6} | {rupees(invested):>16} | {rupees(profit):>16} | {rupees(total_value):>16}")
    # footnote
    print("\nüí° Note: Projections assume monthly contributions increase by 5% annually (step-up SIP).")
    print("üìä Type 'breakdown' (without quotes) and press Enter if you want a year-by-year breakdown (up to 10 years).")

    cmd = input("\nType 'breakdown' to see yearly table or press Enter to finish: ").strip().lower()
    if cmd == "breakdown":
        # print yearly snapshots for years 1..10 from yearly_snapshots_10y
        print("\nüìÜ Year-by-Year Breakdown (shows monthly invested that year, annual invested, cumulative invested, portfolio value, profit)")
        print(f"{'Year':<5} | {'Monthly':>12} | {'Annual Invested':>16} | {'Cumulative Invested':>20} | {'Portfolio Value':>16} | {'Profit':>12}")
        print("-"*100)
        for snap in yearly_snapshots_10y:
            y = snap["year"]
            monthly = snap["monthly_invest"]
            annual = snap["annual_invested"]
            cum = snap["cumulative_principal"]
            pv = snap["portfolio_value"]
            prof = snap["profit"]
            print(f"{y:<5} | {rupees(monthly):>12} | {rupees(annual):>16} | {rupees(cum):>20} | {rupees(pv):>16} | {rupees(prof):>12}")
        print("\nüí° Note: Monthly amount increases by 5% at the start of each year (this is the step-up behavior).")

if __name__ == "__main__":
    run_investment_advisor()


üìÅ Latest stock data file: enhanced_stock_snapshot.csv
üìÅ Latest mutual fund file: mutual_fund_volatility_report.csv

üë§ Enter your financial profile:


Enter your Age:  30
Enter your Monthly Income (‚Çπ):  35000



Education Level (1‚Äì5):
1 = No Formal Education
2 = High School
3 = Bachelor‚Äôs Degree
4 = Master‚Äôs Degree
5 = Doctorate or Professional Degree


Enter your Education Level:  3



üìã Risk Questions (1 = Strongly Disagree | 2 = Disagree | 3 = Neutral | 4 = Agree | 5 = Strongly Agree)


I prefer safer investments even if returns are low (1‚Äì5):  3
I like taking financial risks if the rewards are high (1‚Äì5):  3
I track my expenses and budget regularly (1‚Äì5):  3
I feel confident managing my finances (1‚Äì5):  3
I get anxious when markets fluctuate (1‚Äì5):  3
I prefer guaranteed returns over variable ones (1‚Äì5):  3
I invest for long-term goals (1‚Äì5):  3
I panic sell during downturns (1‚Äì5):  3
I consult a financial advisor before investing (1‚Äì5):  3
I review my portfolio frequently (1‚Äì5):  3
I use multiple investment channels (1‚Äì5):  3
I like reading about finance/investing (1‚Äì5):  3



üéØ Primary Goal:
1 = Retirement
2 = Buying a House
3 = Travel
4 = Child‚Äôs Education
5 = Wealth Growth


Choose 1‚Äì5:  2

üí∏ How much do you want to invest this month (‚Çπ)?:  10000



üß† Predicted Risk Profile: Conservative

üÆÆ Final Suggested Allocation (after leftover distribution):
Stocks         :   5.00%  = ‚Çπ500.00
Mutual Funds   :  25.00%  = ‚Çπ2,500.00
Gold           :  15.00%  = ‚Çπ1,500.00
PPF            :  30.00%  = ‚Çπ3,000.00
Savings        :  20.00%  = ‚Çπ2,000.00
Emergency Fund :   5.00%  = ‚Çπ500.00

üìà Recommended Stocks (Stock Allocation Split):
HINDUNILVR   | Invest ‚Çπ100.00 | Avg Return 30d: -0.04%
HEUBACHIND   | Invest ‚Çπ100.00 | Avg Return 30d: -0.01%
MUKKA        | Invest ‚Çπ100.00 | Avg Return 30d: -0.09%
CIPLA        | Invest ‚Çπ100.00 | Avg Return 30d: 0.07%
EICHERMOT    | Invest ‚Çπ100.00 | Avg Return 30d: 0.19%

üìä Recommended Mutual Funds (Fund Allocation Split):
Axis Liquid Fund - Direct Plan - Daily IDCW                  | Invest ‚Çπ500.00 | Vol(90d): 0.0000
JM Overnight Fund - (Direct) - Unclaimed IDCW I.E.F. - Growt | Invest ‚Çπ500.00 | Vol(90d): 0.0000
Axis Liquid Fund - Regular Plan - Daily IDCW                 | Invest


Type 'breakdown' to see yearly table or press Enter to finish:  breakdown



üìÜ Year-by-Year Breakdown (shows monthly invested that year, annual invested, cumulative invested, portfolio value, profit)
Year  |      Monthly |  Annual Invested |  Cumulative Invested |  Portfolio Value |       Profit
----------------------------------------------------------------------------------------------------
1     |   ‚Çπ10,000.00 |      ‚Çπ120,000.00 |          ‚Çπ120,000.00 |      ‚Çπ124,219.46 |    ‚Çπ4,219.46
2     |   ‚Çπ10,500.00 |      ‚Çπ126,000.00 |          ‚Çπ246,000.00 |      ‚Çπ262,840.39 |   ‚Çπ16,840.39
3     |   ‚Çπ11,025.00 |      ‚Çπ132,300.00 |          ‚Çπ378,300.00 |      ‚Çπ417,196.09 |   ‚Çπ38,896.09
4     |   ‚Çπ11,576.25 |      ‚Çπ138,915.00 |          ‚Çπ517,215.00 |      ‚Çπ588,736.86 |   ‚Çπ71,521.86
5     |   ‚Çπ12,155.06 |      ‚Çπ145,860.75 |          ‚Çπ663,075.75 |      ‚Çπ779,040.30 |  ‚Çπ115,964.55
6     |   ‚Çπ12,762.82 |      ‚Çπ153,153.79 |          ‚Çπ816,229.54 |      ‚Çπ989,822.57 |  ‚Çπ173,593.04
7     |   ‚Çπ13,400.96 |      ‚Çπ