In [7]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib
import warnings
warnings.filterwarnings('ignore')
import re
import yfinance as yf
from datetime import datetime, timedelta

class FinVerseDataGenerator:
    def __init__(self):
        pass
    
    def generate_synthetic_data(self, n_samples=10000):
        np.random.seed(42)
        
        age = np.random.normal(32, 8, n_samples).clip(18, 50)
        
        base_income = 400000 + (age - 18) * 25000
        income_variation = np.random.lognormal(0, 0.6, n_samples)
        annual_income = (base_income * income_variation).clip(300000, 3000000)
        
        savings_rate = np.random.beta(2, 3, n_samples) * 0.3 + 0.1
        monthly_savings = (annual_income * savings_rate) / 12
        
        emergency_months = np.random.gamma(2, 2, n_samples).clip(2, 15)
        emergency_fund = monthly_savings * emergency_months
        
        risk_probs = np.zeros((n_samples, 3))
        for i in range(n_samples):
            if age[i] < 30:
                risk_probs[i] = [0.2, 0.4, 0.4]
            elif age[i] < 40:
                risk_probs[i] = [0.3, 0.5, 0.2]
            else:
                risk_probs[i] = [0.5, 0.4, 0.1]
        
        risk_appetite = []
        for i in range(n_samples):
            risk_appetite.append(np.random.choice(['Low', 'Medium', 'High'], p=risk_probs[i]))
        
        goal_probs = np.zeros((n_samples, 7))
        goal_names = ['Wealth Creation', 'Retirement', 'Short-Term', 'Education', 
                     'Emergency Fund', 'Tax Saving', 'Property Purchase']
        
        for i in range(n_samples):
            if age[i] < 30:
                goal_probs[i] = [0.35, 0.1, 0.2, 0.15, 0.05, 0.1, 0.05]
            elif age[i] < 40:
                goal_probs[i] = [0.25, 0.2, 0.15, 0.1, 0.1, 0.15, 0.05]
            else:
                goal_probs[i] = [0.2, 0.35, 0.1, 0.05, 0.15, 0.1, 0.05]
        
        investment_goals = []
        for i in range(n_samples):
            investment_goals.append(np.random.choice(goal_names, p=goal_probs[i]))
        
        existing_investment = np.random.beta(2, 5, n_samples) * 0.4
        
        data = pd.DataFrame({
            'age': age.astype(int),
            'annual_income': annual_income.astype(int),
            'monthly_savings': monthly_savings.astype(int),
            'emergency_fund': emergency_fund.astype(int),
            'risk_appetite': risk_appetite,
            'investment_goal': investment_goals,
            'existing_investment_pct': existing_investment.round(3)
        })
        
        allocations = self._generate_realistic_allocations(data)
        
        final_data = pd.concat([data, allocations], axis=1)
        
        return final_data
    
    def _generate_realistic_allocations(self, data):
        n_samples = len(data)
        
        debt_allocation = np.zeros(n_samples)
        equity_allocation = np.zeros(n_samples)
        mutual_fund_allocation = np.zeros(n_samples)
        
        for i in range(n_samples):
            age = data.iloc[i]['age']
            risk = data.iloc[i]['risk_appetite']
            goal = data.iloc[i]['investment_goal']
            income = data.iloc[i]['annual_income']
            
            base_equity = max(30, min(70, 100 - age))
            
            if risk == 'Low':
                equity_pct = base_equity * 0.7
                debt_pct = min(60, 70 - equity_pct)
            elif risk == 'Medium':
                equity_pct = base_equity * 0.9
                debt_pct = min(50, 60 - equity_pct)
            else:
                equity_pct = min(75, base_equity * 1.3)
                debt_pct = max(15, 40 - equity_pct)
            
            if goal == 'Short-Term':
                debt_pct = min(70, debt_pct * 1.6)
                equity_pct = max(15, equity_pct * 0.6)
            elif goal == 'Retirement':
                if age < 35:
                    equity_pct = min(70, equity_pct * 1.1)
                else:
                    debt_pct = min(55, debt_pct * 1.2)
            elif goal == 'Education':
                debt_pct = min(55, debt_pct * 1.3)
                equity_pct = max(20, equity_pct * 0.8)
            elif goal == 'Wealth Creation':
                equity_pct = min(75, equity_pct * 1.2)
                debt_pct = max(15, debt_pct * 0.8)
            elif goal == 'Tax Saving':
                mutual_fund_pct = 40
                remaining = 60
                debt_pct = debt_pct * (remaining / (debt_pct + equity_pct))
                equity_pct = equity_pct * (remaining / (debt_pct + equity_pct))
            
            if goal != 'Tax Saving':
                mutual_fund_pct = 100 - debt_pct - equity_pct
            
            if mutual_fund_pct < 20:
                mutual_fund_pct = 20
                remaining = 80
                total_de = debt_pct + equity_pct
                if total_de > 0:
                    debt_pct = debt_pct * (remaining / total_de)
                    equity_pct = equity_pct * (remaining / total_de)
            
            noise = np.random.normal(0, 1.5, 3)
            debt_pct = max(5, debt_pct + noise[0])
            equity_pct = max(10, equity_pct + noise[1])
            mutual_fund_pct = max(15, mutual_fund_pct + noise[2])
            
            total = debt_pct + equity_pct + mutual_fund_pct
            debt_pct = (debt_pct / total) * 100
            equity_pct = (equity_pct / total) * 100
            mutual_fund_pct = (mutual_fund_pct / total) * 100
            
            debt_allocation[i] = round(debt_pct, 2)
            equity_allocation[i] = round(equity_pct, 2)
            mutual_fund_allocation[i] = round(mutual_fund_pct, 2)
        
        return pd.DataFrame({
            'debt_allocation': debt_allocation,
            'equity_allocation': equity_allocation,
            'mutual_fund_allocation': mutual_fund_allocation
        })

class MarketDataFetcher:
    def __init__(self):
        # Major Nifty 50 stocks 
        self.nifty50_stocks = [
            'RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'ICICIBANK.NS',
            'BAJFINANCE.NS', 'BHARTIARTL.NS', 'HINDUNILVR.NS', 'KOTAKBANK.NS', 
            'LT.NS', 'TITAN.NS', 'ADANIENT.NS', 'ULTRACEMCO.NS', 'AXISBANK.NS',
            'WIPRO.NS', 'MARUTI.NS', 'SUNPHARMA.NS', 'ASIANPAINT.NS', 'DMART.NS',
            'BAJAJFINSV.NS', 'NESTLEIND.NS', 'TECHM.NS', 'HCLTECH.NS', 'SBIN.NS',
            'INDUSINDBK.NS', 'ONGC.NS', 'NTPC.NS', 'POWERGRID.NS', 'M&M.NS'
        ]
        
        # Well-established ETFs
        self.index_etfs = [
            'NIFTYBEES.NS',     # Nippon India ETF Nifty BeES
            'BANKBEES.NS',      # Nippon India ETF Bank BeES
            'JUNIORBEES.NS',    # Nippon India ETF Junior BeES
            'SETFNIF50.NS',     # SBI ETF Nifty 50
            'SETFNIFBK.NS',     # SBI ETF Nifty Bank
            'SETF10GILT.NS',    # SBI ETF 10 year Gilt
            'LIQUIDBEES.NS',    # Nippon India ETF Liquid BeES
            'GOLDBEES.NS'       # Nippon India ETF Gold BeES
        ]
        
        # Debt instruments - using more reliable tickers
        self.debt_instruments = [
            'LIQUIDBEES.NS',    # Liquid Fund ETF
            'SETF10GILT.NS',    # 10 Year Gilt ETF
            'ABSLBANETF.NS',    # Aditya Birla Sun Life Banking & PSU Debt ETF
            'GILT5YBEES.NS'     # Nippon India ETF Long Term Gilt
        ]
        
        # Index funds and large cap mutual fund proxies (using ETFs as they're more reliable)
        self.mutual_fund_alternatives = [
            'SETFNIF50.NS',     # SBI Nifty 50 ETF
            'HDFCNIFETF.NS',    # HDFC Nifty ETF
            'ICICIB22.NS',      # ICICI Bharat 22 ETF
            'ABSLNN50ET.NS',    # Aditya Birla Nifty 50 ETF
            'KOTAKNIFTY.NS'     # Kotak Nifty ETF
        ]
    
    def safe_fetch_data(self, ticker, fetch_function, period='1y'):
        """Safely fetch data with error handling"""
        try:
            return fetch_function(ticker, period)
        except Exception as e:
            return None
    
    def get_stock_data(self, ticker, period='1y'):
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(period=period)
            
            if len(hist) == 0:
                return None
                
            info = stock.info
            current_price = hist['Close'][-1]
            
            # Calculate returns
            if len(hist) > 1:
                year_return = ((hist['Close'][-1] - hist['Close'][0]) / hist['Close'][0] * 100)
                volatility = hist['Close'].pct_change().std() * np.sqrt(252) * 100
            else:
                year_return = 0
                volatility = 0
            
            # Get name with fallback
            name = info.get('longName') or info.get('shortName') or ticker.replace('.NS', '').replace('.BO', '')
            
            return {
                'ticker': ticker,
                'name': name,
                'current_price': round(current_price, 2),
                'year_return': round(year_return, 2),
                'volatility': round(volatility, 2),
                'category': 'Equity'
            }
        except Exception as e:
            return None
    
    def get_etf_data(self, ticker, period='1y'):
        """Specifically for ETFs which might have different data structure"""
        try:
            etf = yf.Ticker(ticker)
            hist = etf.history(period=period)
            
            if len(hist) == 0:
                return None
                
            info = etf.info
            current_nav = hist['Close'][-1]
            
            # Calculate returns
            if len(hist) > 1:
                year_return = ((hist['Close'][-1] - hist['Close'][0]) / hist['Close'][0] * 100)
                volatility = hist['Close'].pct_change().std() * np.sqrt(252) * 100
            else:
                year_return = 0
                volatility = 0
            
            # Get name with fallback
            name = info.get('longName') or info.get('shortName') or ticker.replace('.NS', '').replace('.BO', '')
            
            return {
                'ticker': ticker,
                'name': name,
                'current_nav': round(current_nav, 2),
                'year_return': round(year_return, 2),
                'volatility': round(volatility, 2),
                'category': 'ETF/Index Fund'
            }
        except Exception as e:
            return None
    
    def get_top_performers(self, instruments, category, risk_appetite):
        """Fetch and sort instruments by returns with error handling"""
        performances = []
        
        for ticker in instruments:
            if category in ['ETF', 'Mutual Fund']:
                data = self.safe_fetch_data(ticker, self.get_etf_data)
            else:
                data = self.safe_fetch_data(ticker, self.get_stock_data)
                
            if data and data['year_return'] > -10:  # Include if not too negative
                performances.append(data)
        
        # Sort by year return in descending order
        performances.sort(key=lambda x: x['year_return'], reverse=True)
        
        # Filter based on risk appetite
        if risk_appetite == 'Low':
            # For low risk, prefer lower volatility and positive returns
            performances = [p for p in performances if p['volatility'] < 20 and p['year_return'] > 0]
        elif risk_appetite == 'Medium':
            # For medium risk, moderate volatility
            performances = [p for p in performances if p['volatility'] < 30]
        # High risk can take any volatility
        
        return performances[:5]  # Return top 5 to have more options
    
    def get_recommendations(self, risk_appetite, investment_horizon_months, amount, allocation):
        recommendations = {
            'debt': [],
            'equity': [],
            'mutual_fund': []
        }
        
        # Debt recommendations
        if allocation[0] > 10:
            # Always add FD as a safe option
            recommendations['debt'].append({
                'ticker': 'FD',
                'name': 'Bank Fixed Deposit (7-8% p.a.)',
                'current_price': amount,
                'year_return': 7.5,
                'volatility': 0,
                'category': 'Debt'
            })
            
            # Add PPF for long term
            if investment_horizon_months >= 36:
                recommendations['debt'].append({
                    'ticker': 'PPF',
                    'name': 'Public Provident Fund',
                    'current_price': amount,
                    'year_return': 7.1,
                    'volatility': 0,
                    'category': 'Debt'
                })
            
            # Try to fetch debt ETFs
            debt_etfs = ['LIQUIDBEES.NS', 'SETF10GILT.NS']
            for ticker in debt_etfs:
                data = self.safe_fetch_data(ticker, self.get_etf_data)
                if data:
                    data['category'] = 'Debt ETF'
                    recommendations['debt'].append(data)
                    if len(recommendations['debt']) >= 3:
                        break
        
        # Equity recommendations
        if allocation[1] > 10 and investment_horizon_months > 12:
            # Get top performing stocks
            top_stocks = self.get_top_performers(
                self.nifty50_stocks[:15],  # Check top 15 stocks
                'Equity', 
                risk_appetite
            )
            recommendations['equity'].extend(top_stocks[:3])
        
        # Mutual Fund/ETF recommendations
        if allocation[2] > 10:
            # First try ETFs as they're more reliable
            top_etfs = self.get_top_performers(
                self.index_etfs[:8],  # Check various ETFs
                'ETF',
                risk_appetite
            )
            
            for etf in top_etfs[:3]:
                etf['category'] = 'ETF/Index Fund'
                recommendations['mutual_fund'].append(etf)
        
        # Compile final recommendations based on allocation
        all_recommendations = []
        
        # Prioritize based on allocation percentages
        categories_sorted = sorted([
            ('debt', allocation[0], recommendations['debt']),
            ('equity', allocation[1], recommendations['equity']),
            ('mutual_fund', allocation[2], recommendations['mutual_fund'])
        ], key=lambda x: x[1], reverse=True)
        
        # Add recommendations from each category
        for category_name, alloc_pct, category_recs in categories_sorted:
            if category_recs and alloc_pct > 10:
                # Add at least one from this category
                all_recommendations.append(category_recs[0])
        
        # Fill remaining slots
        for category_name, alloc_pct, category_recs in categories_sorted:
            for rec in category_recs[1:]:
                if len(all_recommendations) < 3:
                    all_recommendations.append(rec)
        
        # Ensure we have at least 3 recommendations
        if len(all_recommendations) < 3:
            # Add safe government options
            safe_options = [
                {
                    'ticker': 'NSC',
                    'name': 'National Savings Certificate',
                    'current_price': amount,
                    'year_return': 7.7,
                    'volatility': 0,
                    'category': 'Government Scheme'
                },
                {
                    'ticker': 'SCSS',
                    'name': 'Senior Citizens Savings Scheme',
                    'current_price': amount,
                    'year_return': 8.2,
                    'volatility': 0,
                    'category': 'Government Scheme'
                }
            ]
            
            for option in safe_options:
                if len(all_recommendations) < 3:
                    all_recommendations.append(option)
        
        return all_recommendations[:3]

class FinVerseModel:
    def __init__(self):
        self.model = None
        self.market_fetcher = MarketDataFetcher()
        
    def prepare_features(self, data):
        feature_cols = ['age', 'annual_income', 'monthly_savings', 'emergency_fund', 
                       'risk_appetite', 'investment_goal', 'existing_investment_pct']
        target_cols = ['debt_allocation', 'equity_allocation', 'mutual_fund_allocation']
        
        X = data[feature_cols]
        y = data[target_cols]
        
        numerical_features = ['age', 'annual_income', 'monthly_savings', 
                            'emergency_fund', 'existing_investment_pct']
        categorical_features = ['risk_appetite', 'investment_goal']
        
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), numerical_features),
                ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_features)
            ]
        )
        
        return X, y, preprocessor
    
    def train_model(self, data):
        X, y, preprocessor = self.prepare_features(data)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        self.model = Pipeline([
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(
                n_estimators=200,
                max_depth=20,
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42,
                n_jobs=-1
            ))
        ])
        
        self.model.fit(X_train, y_train)
        
        y_pred = self.model.predict(X_test)
        print("\nModel Performance:")
        print("-" * 40)
        
        for i, target in enumerate(['Debt', 'Equity', 'Mutual Fund']):
            r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
            mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
            print(f"{target} Allocation - R²: {r2:.4f}, MAE: {mae:.4f}")
        
        return self.model
    
    def save_model(self, filename='finverse_model.pkl'):
        joblib.dump(self.model, filename)
        print(f"Model saved to {filename}")

def parse_goal_context(message):
    extracted = {}
    
    # Improved amount parsing patterns
    amount_patterns = [
        r'(?:₹|Rs\.?|INR)?\s?(\d+(?:,\d+)*(?:\.\d+)?)\s*(L|l|lakhs?|lacs?|K|k|thousands?|Cr|cr|crores?)?',
        r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(L|l|lakhs?|lacs?|K|k|thousands?|Cr|cr|crores?)?(?:\s*(?:rupees?|Rs\.?|INR))?',
        r'(\d+(?:,\d+)*(?:\.\d+)?)(?:\s|$)'  # Plain numbers
    ]
    
    for pattern in amount_patterns:
        amount_match = re.search(pattern, message, re.IGNORECASE)
        if amount_match:
            # Remove commas and convert to float
            value_str = amount_match.group(1).replace(',', '')
            value = float(value_str)
            
            # Check for suffix if present
            suffix = amount_match.group(2) if len(amount_match.groups()) > 1 else None
            
            if suffix:
                suffix_lower = suffix.lower()
                if suffix_lower.startswith('l') or suffix_lower.startswith('lac'):
                    value *= 100000
                elif suffix_lower.startswith('k'):
                    value *= 1000
                elif suffix_lower.startswith('cr'):
                    value *= 10000000
            
            # Only accept reasonable amounts (at least 1000 Rs)
            if value >= 1000:
                extracted["goal_amount"] = int(value)
                break
    
    # Time patterns
    time_patterns = [
        r'(\d+)\s*(years?|yrs?)',
        r'(\d+)\s*(months?|mths?)',
        r'in\s*(\d+)\s*(years?|months?)',
        r'after\s*(\d+)\s*(years?|months?)',
        r'(\d+)\s*(year|month)'  # Handle singular without 's'
    ]
    
    for pattern in time_patterns:
        time_match = re.search(pattern, message, re.IGNORECASE)
        if time_match:
            value = int(time_match.group(1))
            unit = time_match.group(2).lower()
            if 'year' in unit or 'yr' in unit:
                extracted["timeline_months"] = value * 12
            else:
                extracted["timeline_months"] = value
            break
    
    # Goal keywords
    goal_keywords = {
        'vacation': 'vacation',
        'holiday': 'vacation',
        'trip': 'vacation',
        'car': 'car purchase',
        'vehicle': 'car purchase',
        'wedding': 'wedding',
        'marriage': 'wedding',
        'house': 'property',
        'home': 'property',
        'education': 'education',
        'study': 'education',
        'retire': 'retirement',
        'retirement': 'retirement'
    }
    
    message_lower = message.lower()
    for keyword, goal_type in goal_keywords.items():
        if keyword in message_lower:
            extracted["goal_type"] = goal_type
            break
    
    # Risk appetite
    if "low risk" in message_lower:
        extracted["risk_appetite"] = "Low"
    elif "high risk" in message_lower:
        extracted["risk_appetite"] = "High"
    elif "medium risk" in message_lower or "moderate risk" in message_lower:
        extracted["risk_appetite"] = "Medium"
    
    # Age
    age_match = re.search(r'\b(\d{2})\b(?:\s*years?\s*old)?', message)
    if age_match:
        extracted["age"] = int(age_match.group(1))
    
    # Annual income
    income_match = re.search(r'earn\s*(?:₹|Rs\.?|INR)?\s?(\d+(?:,\d+)*(?:\.\d+)?)\s*(L|l|lakhs?|K|k)?', message, re.IGNORECASE)
    if income_match:
        value_str = income_match.group(1).replace(',', '')
        value = float(value_str)
        suffix = income_match.group(2)
        if suffix and suffix.lower().startswith('l'):
            value *= 100000
        elif suffix and suffix.lower().startswith('k'):
            value *= 1000
        extracted["annual_income"] = int(value)
    
    return extracted

def format_investment_recommendations(recommendations, goal_amount, timeline_months, allocation):
    if not recommendations:
        return "Unable to fetch market data at this time. Please try again later."
    
    monthly_investment = goal_amount / timeline_months if timeline_months > 0 else 0
    
    output = f"\n📈 Recommended Portfolio Allocation:\n"
    output += f"   • Debt Instruments: {allocation[0]:.1f}%\n"
    output += f"   • Equity: {allocation[1]:.1f}%\n"
    output += f"   • Mutual Funds: {allocation[2]:.1f}%\n"
    
    output += f"\n💰 Target Amount: ₹{goal_amount:,} in {timeline_months} months\n"
    output += f"   Monthly Investment Required: ₹{monthly_investment:,.0f}\n"
    output += f"   • Debt portion: ₹{monthly_investment * allocation[0] / 100:,.0f}\n"
    output += f"   • Equity portion: ₹{monthly_investment * allocation[1] / 100:,.0f}\n"
    output += f"   • Mutual Fund portion: ₹{monthly_investment * allocation[2] / 100:,.0f}\n"
    
    output += f"\n📊 Top Investment Options:\n"
    output += "=" * 70 + "\n"
    
    for i, rec in enumerate(recommendations, 1):
        output += f"\n{i}. {rec['name']} ({rec['ticker']})\n"
        output += f"   Category: {rec['category']}\n"
        
        if rec['category'] == 'Debt' and rec['ticker'] == 'FD':
            output += f"   Expected Annual Return: {rec['year_return']:.2f}%\n"
        elif rec['category'] == 'Equity':
            output += f"   Current Price: ₹{rec['current_price']:.2f}\n"
            output += f"   1-Year Return: {rec['year_return']:.2f}%\n"
            output += f"   Volatility: {rec['volatility']:.2f}%\n"
        elif rec['category'] == 'Mutual Fund':
            output += f"   Current NAV: ₹{rec.get('current_nav', rec.get('current_price', 0)):.2f}\n"
            output += f"   1-Year Return: {rec['year_return']:.2f}%\n"
            output += f"   Volatility: {rec['volatility']:.2f}%\n"
        
        # Calculate expected value
        expected_return = rec['year_return'] * (timeline_months / 12)
        expected_value = goal_amount * (1 + expected_return / 100)
        output += f"   Expected Value: ₹{expected_value:,.0f}\n"
    
    output += "\n⚠️ Disclaimer: Past performance does not guarantee future results. "
    output += "This is an AI generated financial advice based on market data. Please consult a financial advisor before investing.\n"
    
    return output

def run_enhanced_chat(model_pipeline):
    market_fetcher = MarketDataFetcher()
    user_context = {}
    
    print("\n💬 Welcome to FinVerse AI - Enhanced Investment Advisor!")
    print("\nI can help you with:")
    print("  • Portfolio allocation recommendations")
    print("  • Short-term goal planning (vacation, car, wedding, etc.)")
    print("  • Real-time investment suggestions from Indian markets\n")
    print("Sample Prompts to ask:")
    print("  • 'I want to save 5L for a vacation in 6 months'")
    print("  • 'Need 20L for wedding in 2 years, medium risk'")
    print("  • 'I'm 30, earn 10L annually, want to invest for retirement'\n")
    print("Type 'exit' to quit.\n")
    
    while True:
        user_input = input("🧑 You: ").strip()
        
        if not user_input:
            continue
        if user_input.lower() in ['exit', 'quit']:
            print("👋 Thank you for using FinVerse AI. Goodbye!")
            break
        
        # Parse new context from user input
        context = parse_goal_context(user_input)
        
        # Update user context without overwriting existing values with empty ones
        for key, value in context.items():
            user_context[key] = value
        
        # Check if this is a non-financial query
        non_financial_keywords = ['weather', 'recipe', 'movie', 'game', 'food', 'sports', 'cricket', 'football']
        if any(word in user_input.lower() for word in non_financial_keywords) and \
           not any(word in user_input.lower() for word in ['invest', 'save', 'money', 'fund', 'portfolio']):
            print("\n🤖 I'm a financial advisor AI. I can help you with investment planning,")
            print("   portfolio recommendations, and financial goal planning.")
            print("   What financial goals would you like to discuss?\n")
            continue
        
        # Handle retirement planning
        if 'retirement' in user_input.lower() and 'goal_amount' not in user_context:
            if 'age' in user_context:
                retirement_age = 60
                years_to_retirement = retirement_age - user_context['age']
                user_context['timeline_months'] = years_to_retirement * 12
                user_context['goal_type'] = 'retirement'
                user_context['investment_goal'] = 'Retirement'
        
        # Check for missing information
        missing_info = []
        if 'goal_type' in user_context or 'goal_amount' in user_context:
            if 'goal_amount' not in user_context and 'retirement' not in user_input.lower():
                missing_info.append("goal amount")
            if 'timeline_months' not in user_context and 'retirement' not in user_input.lower():
                missing_info.append("timeline")
        
        # Ask for missing information
        if missing_info:
            missing_str = " and ".join(missing_info)
            print(f"\n🤖 I need to know your {missing_str} to provide specific recommendations.")
            
            if "goal amount" in missing_info:
                amount_input = input("   How much do you want to save? (e.g., '5L', '20 lakhs'): ")
                amount_context = parse_goal_context(amount_input)
                if 'goal_amount' in amount_context:
                    user_context['goal_amount'] = amount_context['goal_amount']
            
            if "timeline" in missing_info:
                timeline_input = input("   When do you need this amount? (e.g., '6 months', '2 years'): ")
                timeline_context = parse_goal_context(timeline_input)
                if 'timeline_months' in timeline_context:
                    user_context['timeline_months'] = timeline_context['timeline_months']
                else:
                    user_context['timeline_months'] = 12
                    print("   Assuming 1 year timeline.")
        
        # Get risk appetite and investment goal
        risk = user_context.get('risk_appetite', 'Medium')
        
        if 'timeline_months' in user_context:
            if user_context['timeline_months'] <= 12:
                investment_goal = 'Short-Term'
            else:
                investment_goal = user_context.get('investment_goal', 'Wealth Creation')
        else:
            investment_goal = user_context.get('investment_goal', 'Wealth Creation')
        
        # Create profile for model prediction
        profile = {
            'age': user_context.get('age', 30),
            'annual_income': user_context.get('annual_income', 800000),
            'monthly_savings': user_context.get('goal_amount', 100000) / user_context.get('timeline_months', 12) if user_context.get('timeline_months', 12) > 0 else 20000,
            'emergency_fund': 100000,
            'risk_appetite': risk,
            'investment_goal': investment_goal,
            'existing_investment_pct': 0.1
        }
        
        profile_df = pd.DataFrame([profile])
        allocation = model_pipeline.predict(profile_df)[0]
        
        # If we have both goal amount and timeline, provide full recommendations
        if 'goal_amount' in user_context and 'timeline_months' in user_context:
            # Validate goal amount
            if user_context['goal_amount'] < 1000:
                print("\n🤖 The goal amount seems too low. Please enter an amount of at least ₹1,000.")
                user_context.pop('goal_amount', None)
                continue
            
            # Get market recommendations
            recommendations = market_fetcher.get_recommendations(
                risk, user_context['timeline_months'], user_context['goal_amount'], allocation
            )
            
            # Display recommendations
            print(format_investment_recommendations(
                recommendations, user_context['goal_amount'], user_context['timeline_months'], allocation
            ))
            
            # Clear context for next query but keep user profile info
            keep_keys = ['age', 'annual_income', 'risk_appetite']
            user_context = {k: v for k, v in user_context.items() if k in keep_keys}
            
        else:
            # Just show allocation if we don't have complete goal information
            print(f"\n📈 Based on your profile, here's my portfolio recommendation:")
            print(f"   • Debt Instruments: {allocation[0]:.1f}%")
            print(f"   • Equity: {allocation[1]:.1f}%")
            print(f"   • Mutual Funds: {allocation[2]:.1f}%")
            print(f"\n💡 To get specific investment options and monthly contribution amounts,")
            print(f"   please provide your financial goal and timeline.\n")

def main():
    print("Initializing FinVerse AI...")
    generator = FinVerseDataGenerator()
    synthetic_data = generator.generate_synthetic_data(n_samples=8000)
    
    print("\nTraining Random Forest model...")
    finverse_model = FinVerseModel()
    model_pipeline = finverse_model.train_model(synthetic_data)
    finverse_model.save_model()
    
    print("\n✅ Model training complete. Launching enhanced chat interface...")
    run_enhanced_chat(model_pipeline)

if __name__ == "__main__":
    main()

Initializing FinVerse AI...

Training Random Forest model...

Model Performance:
----------------------------------------
Debt Allocation - R²: 0.9846, MAE: 0.8397
Equity Allocation - R²: 0.9864, MAE: 1.0830
Mutual Fund Allocation - R²: 0.9882, MAE: 0.9933
Model saved to finverse_model.pkl

✅ Model training complete. Launching enhanced chat interface...

💬 Welcome to FinVerse AI - Enhanced Investment Advisor!

I can help you with:
  • Portfolio allocation recommendations
  • Short-term goal planning (vacation, car, wedding, etc.)
  • Real-time investment suggestions from Indian markets

Examples:
  • 'I want to save 5L for a vacation in 6 months'
  • 'Need 20L for wedding in 2 years, medium risk'
  • 'I'm 30, earn 10L annually, want to invest for retirement'

Type 'exit' to quit.



🧑 You:  I'm 30, earn 10L annually, want to invest for retirement



📈 Based on your profile, here's my portfolio recommendation:
   • Debt Instruments: 4.6%
   • Equity: 64.1%
   • Mutual Funds: 31.3%

💡 To get specific investment options and monthly contribution amounts,
   please provide your financial goal and timeline.



🧑 You:  50 lakhs in 20 years



📈 Recommended Portfolio Allocation:
   • Debt Instruments: 18.1%
   • Equity: 45.3%
   • Mutual Funds: 36.6%

💰 Target Amount: ₹5,000,000 in 240 months
   Monthly Investment Required: ₹20,833
   • Debt portion: ₹3,763
   • Equity portion: ₹9,441
   • Mutual Fund portion: ₹7,629

📊 Top Investment Options:

1. Bajaj Finance Limited (BAJFINANCE.NS)
   Category: Equity
   Current Price: ₹926.25
   1-Year Return: 30.27%
   Volatility: 25.65%
   Expected Value: ₹35,270,000

2. Nippon India ETF Gold BeES (GOLDBEES.NS)
   Category: ETF/Index Fund
   Expected Value: ₹34,550,000

3. Bank Fixed Deposit (7-8% p.a.) (FD)
   Category: Debt
   Expected Annual Return: 7.50%
   Expected Value: ₹12,500,000

⚠️ Disclaimer: Past performance does not guarantee future results. This is an AI generated financial advice based on market data. Please consult a financial advisor before investing.



🧑 You:  exit


👋 Thank you for using FinVerse AI. Goodbye!
