In [8]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib
import warnings
warnings.filterwarnings('ignore')
import re
import yfinance as yf
from datetime import datetime, timedelta
import json
from typing import Dict, List, Any, Optional

# LangChain imports - Updated for compatibility
try:
    from langchain.llms import OpenAI
    from langchain.chat_models import ChatOpenAI
except ImportError:
    from langchain_openai import OpenAI, ChatOpenAI

from langchain.memory import ConversationBufferWindowMemory
from langchain.agents import Tool, AgentExecutor, initialize_agent, AgentType
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, ConversationChain
from langchain.schema import BaseOutputParser
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory.chat_message_histories import ChatMessageHistory
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
import os

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = ""  # Add your API key here

class FinVerseDataGenerator:
    """Generates synthetic financial data for training the ML model"""
    
    def __init__(self):
        pass
    
    def generate_synthetic_data(self, n_samples=10000):
        """Generate synthetic financial profiles and portfolio allocations"""
        np.random.seed(42)
        
        # Generate age distribution
        age = np.random.normal(32, 8, n_samples).clip(18, 50)
        
        # Generate income based on age
        base_income = 400000 + (age - 18) * 25000
        income_variation = np.random.lognormal(0, 0.6, n_samples)
        annual_income = (base_income * income_variation).clip(300000, 3000000)
        
        # Calculate savings
        savings_rate = np.random.beta(2, 3, n_samples) * 0.3 + 0.1
        monthly_savings = (annual_income * savings_rate) / 12
        
        # Emergency fund
        emergency_months = np.random.gamma(2, 2, n_samples).clip(2, 15)
        emergency_fund = monthly_savings * emergency_months
        
        # Risk appetite based on age
        risk_probs = np.zeros((n_samples, 3))
        for i in range(n_samples):
            if age[i] < 30:
                risk_probs[i] = [0.2, 0.4, 0.4]  # Low, Medium, High
            elif age[i] < 40:
                risk_probs[i] = [0.3, 0.5, 0.2]
            else:
                risk_probs[i] = [0.5, 0.4, 0.1]
        
        risk_appetite = []
        for i in range(n_samples):
            risk_appetite.append(np.random.choice(['Low', 'Medium', 'High'], p=risk_probs[i]))
        
        # Investment goals based on age
        goal_probs = np.zeros((n_samples, 7))
        goal_names = ['Wealth Creation', 'Retirement', 'Short-Term', 'Education', 
                     'Emergency Fund', 'Tax Saving', 'Property Purchase']
        
        for i in range(n_samples):
            if age[i] < 30:
                goal_probs[i] = [0.35, 0.1, 0.2, 0.15, 0.05, 0.1, 0.05]
            elif age[i] < 40:
                goal_probs[i] = [0.25, 0.2, 0.15, 0.1, 0.1, 0.15, 0.05]
            else:
                goal_probs[i] = [0.2, 0.35, 0.1, 0.05, 0.15, 0.1, 0.05]
        
        investment_goals = []
        for i in range(n_samples):
            investment_goals.append(np.random.choice(goal_names, p=goal_probs[i]))
        
        # Existing investments
        existing_investment = np.random.beta(2, 5, n_samples) * 0.4
        
        # Create dataframe
        data = pd.DataFrame({
            'age': age.astype(int),
            'annual_income': annual_income.astype(int),
            'monthly_savings': monthly_savings.astype(int),
            'emergency_fund': emergency_fund.astype(int),
            'risk_appetite': risk_appetite,
            'investment_goal': investment_goals,
            'existing_investment_pct': existing_investment.round(3)
        })
        
        # Generate realistic allocations
        allocations = self._generate_realistic_allocations(data)
        
        return pd.concat([data, allocations], axis=1)
    
    def _generate_realistic_allocations(self, data):
        """Generate realistic portfolio allocations based on profile"""
        n_samples = len(data)
        
        debt_allocation = np.zeros(n_samples)
        equity_allocation = np.zeros(n_samples)
        mutual_fund_allocation = np.zeros(n_samples)
        
        for i in range(n_samples):
            age = data.iloc[i]['age']
            risk = data.iloc[i]['risk_appetite']
            goal = data.iloc[i]['investment_goal']
            income = data.iloc[i]['annual_income']
            
            # Base equity allocation (100 - age rule)
            base_equity = max(30, min(70, 100 - age))
            
            # Adjust based on risk appetite
            if risk == 'Low':
                equity_pct = base_equity * 0.7
                debt_pct = min(60, 70 - equity_pct)
            elif risk == 'Medium':
                equity_pct = base_equity * 0.9
                debt_pct = min(50, 60 - equity_pct)
            else:  # High risk
                equity_pct = min(75, base_equity * 1.3)
                debt_pct = max(15, 40 - equity_pct)
            
            # Adjust based on goal
            if goal == 'Short-Term':
                debt_pct = min(70, debt_pct * 1.6)
                equity_pct = max(15, equity_pct * 0.6)
            elif goal == 'Retirement':
                if age < 35:
                    equity_pct = min(70, equity_pct * 1.1)
                else:
                    debt_pct = min(55, debt_pct * 1.2)
            elif goal == 'Education':
                debt_pct = min(55, debt_pct * 1.3)
                equity_pct = max(20, equity_pct * 0.8)
            elif goal == 'Wealth Creation':
                equity_pct = min(75, equity_pct * 1.2)
                debt_pct = max(15, debt_pct * 0.8)
            elif goal == 'Tax Saving':
                mutual_fund_pct = 40
                remaining = 60
                debt_pct = debt_pct * (remaining / (debt_pct + equity_pct))
                equity_pct = equity_pct * (remaining / (debt_pct + equity_pct))
            
            # Calculate mutual fund allocation
            if goal != 'Tax Saving':
                mutual_fund_pct = 100 - debt_pct - equity_pct
            
            # Ensure minimum allocation to mutual funds
            if mutual_fund_pct < 20:
                mutual_fund_pct = 20
                remaining = 80
                total_de = debt_pct + equity_pct
                if total_de > 0:
                    debt_pct = debt_pct * (remaining / total_de)
                    equity_pct = equity_pct * (remaining / total_de)
            
            # Add some noise for realism
            noise = np.random.normal(0, 1.5, 3)
            debt_pct = max(5, debt_pct + noise[0])
            equity_pct = max(10, equity_pct + noise[1])
            mutual_fund_pct = max(15, mutual_fund_pct + noise[2])
            
            # Normalize to 100%
            total = debt_pct + equity_pct + mutual_fund_pct
            debt_pct = (debt_pct / total) * 100
            equity_pct = (equity_pct / total) * 100
            mutual_fund_pct = (mutual_fund_pct / total) * 100
            
            debt_allocation[i] = round(debt_pct, 2)
            equity_allocation[i] = round(equity_pct, 2)
            mutual_fund_allocation[i] = round(mutual_fund_pct, 2)
        
        return pd.DataFrame({
            'debt_allocation': debt_allocation,
            'equity_allocation': equity_allocation,
            'mutual_fund_allocation': mutual_fund_allocation
        })

class MarketDataFetcher:
    """Fetches real-time market data from Yahoo Finance"""
    
    def __init__(self):
        # Popular Nifty 50 stocks
        self.nifty50_stocks = [
            'RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'ICICIBANK.NS',
            'BAJFINANCE.NS', 'BHARTIARTL.NS', 'HINDUNILVR.NS', 'KOTAKBANK.NS', 
            'LT.NS', 'TITAN.NS', 'ADANIENT.NS', 'ULTRACEMCO.NS', 'AXISBANK.NS',
            'WIPRO.NS', 'MARUTI.NS', 'SUNPHARMA.NS', 'ASIANPAINT.NS',
            'BAJAJFINSV.NS', 'NESTLEIND.NS', 'TECHM.NS', 'HCLTECH.NS', 'SBIN.NS'
        ]
        
        # Popular ETFs
        self.index_etfs = [
            'NIFTYBEES.NS', 'BANKBEES.NS', 'JUNIORBEES.NS', 'SETFNIF50.NS',
            'SETFNIFBK.NS', 'SETF10GILT.NS', 'LIQUIDBEES.NS', 'GOLDBEES.NS'
        ]
        
        # Debt instruments
        self.debt_instruments = [
            'LIQUIDBEES.NS', 'SETF10GILT.NS', 'ABSLBANETF.NS', 'GILT5YBEES.NS'
        ]
    
    def safe_fetch_data(self, ticker, fetch_function, period='1y'):
        """Safely fetch data with error handling"""
        try:
            return fetch_function(ticker, period)
        except Exception as e:
            return None
    
    def get_stock_data(self, ticker, period='1y'):
        """Fetch stock data from Yahoo Finance"""
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(period=period)
            
            if len(hist) == 0:
                return None
                
            info = stock.info
            current_price = hist['Close'][-1]
            
            # Calculate returns
            if len(hist) > 1:
                year_return = ((hist['Close'][-1] - hist['Close'][0]) / hist['Close'][0] * 100)
                volatility = hist['Close'].pct_change().std() * np.sqrt(252) * 100
            else:
                year_return = 0
                volatility = 0
            
            # Get name
            name = info.get('longName') or info.get('shortName') or ticker.replace('.NS', '')
            
            return {
                'ticker': ticker,
                'name': name,
                'current_price': round(current_price, 2),
                'year_return': round(year_return, 2),
                'volatility': round(volatility, 2),
                'category': 'Equity'
            }
        except Exception as e:
            return None
    
    def get_etf_data(self, ticker, period='1y'):
        """Fetch ETF data from Yahoo Finance"""
        try:
            etf = yf.Ticker(ticker)
            hist = etf.history(period=period)
            
            if len(hist) == 0:
                return None
                
            info = etf.info
            current_nav = hist['Close'][-1]
            
            # Calculate returns
            if len(hist) > 1:
                year_return = ((hist['Close'][-1] - hist['Close'][0]) / hist['Close'][0] * 100)
                volatility = hist['Close'].pct_change().std() * np.sqrt(252) * 100
            else:
                year_return = 0
                volatility = 0
            
            # Get name
            name = info.get('longName') or info.get('shortName') or ticker.replace('.NS', '')
            
            return {
                'ticker': ticker,
                'name': name,
                'current_nav': round(current_nav, 2),
                'year_return': round(year_return, 2),
                'volatility': round(volatility, 2),
                'category': 'ETF/Index Fund'
            }
        except Exception as e:
            return None
    
    def get_top_performers(self, instruments, category, risk_appetite):
        """Fetch and sort instruments by returns with error handling"""
        performances = []
        
        for ticker in instruments:
            if category in ['ETF', 'Mutual Fund']:
                data = self.safe_fetch_data(ticker, self.get_etf_data)
            else:
                data = self.safe_fetch_data(ticker, self.get_stock_data)
                
            if data and data['year_return'] > -10:
                performances.append(data)
        
        # Sort by year return
        performances.sort(key=lambda x: x['year_return'], reverse=True)
        
        # Filter based on risk appetite
        if risk_appetite == 'Low':
            performances = [p for p in performances if p['volatility'] < 20 and p['year_return'] > 0]
        elif risk_appetite == 'Medium':
            performances = [p for p in performances if p['volatility'] < 30]
        
        return performances[:5]
    
    def get_recommendations(self, risk_appetite, investment_horizon_months, amount, allocation):
        """Get investment recommendations based on allocation"""
        recommendations = {
            'debt': [],
            'equity': [],
            'mutual_fund': []
        }
        
        # Debt recommendations
        if allocation[0] > 10:
            # Always add FD as a safe option
            recommendations['debt'].append({
                'ticker': 'FD',
                'name': 'Bank Fixed Deposit (7-8% p.a.)',
                'current_price': amount,
                'year_return': 7.5,
                'volatility': 0,
                'category': 'Debt'
            })
            
            # Add PPF for long term
            if investment_horizon_months >= 36:
                recommendations['debt'].append({
                    'ticker': 'PPF',
                    'name': 'Public Provident Fund',
                    'current_price': amount,
                    'year_return': 7.1,
                    'volatility': 0,
                    'category': 'Debt'
                })
            
            # Try to fetch debt ETFs
            debt_etfs = ['LIQUIDBEES.NS', 'SETF10GILT.NS']
            for ticker in debt_etfs:
                data = self.safe_fetch_data(ticker, self.get_etf_data)
                if data:
                    data['category'] = 'Debt ETF'
                    recommendations['debt'].append(data)
                    if len(recommendations['debt']) >= 3:
                        break
        
        # Equity recommendations
        if allocation[1] > 10 and investment_horizon_months > 12:
            top_stocks = self.get_top_performers(
                self.nifty50_stocks[:15],
                'Equity', 
                risk_appetite
            )
            recommendations['equity'].extend(top_stocks[:3])
        
        # Mutual Fund/ETF recommendations
        if allocation[2] > 10:
            top_etfs = self.get_top_performers(
                self.index_etfs[:8],
                'ETF',
                risk_appetite
            )
            
            for etf in top_etfs[:3]:
                etf['category'] = 'ETF/Index Fund'
                recommendations['mutual_fund'].append(etf)
        
        # Compile final recommendations
        all_recommendations = []
        
        # Prioritize based on allocation percentages
        categories_sorted = sorted([
            ('debt', allocation[0], recommendations['debt']),
            ('equity', allocation[1], recommendations['equity']),
            ('mutual_fund', allocation[2], recommendations['mutual_fund'])
        ], key=lambda x: x[1], reverse=True)
        
        # Add recommendations from each category
        for category_name, alloc_pct, category_recs in categories_sorted:
            if category_recs and alloc_pct > 10:
                all_recommendations.append(category_recs[0])
        
        # Fill remaining slots
        for category_name, alloc_pct, category_recs in categories_sorted:
            for rec in category_recs[1:]:
                if len(all_recommendations) < 3:
                    all_recommendations.append(rec)
        
        # Ensure we have at least 3 recommendations
        if len(all_recommendations) < 3:
            safe_options = [
                {
                    'ticker': 'NSC',
                    'name': 'National Savings Certificate',
                    'current_price': amount,
                    'year_return': 7.7,
                    'volatility': 0,
                    'category': 'Government Scheme'
                },
                {
                    'ticker': 'SCSS',
                    'name': 'Senior Citizens Savings Scheme',
                    'current_price': amount,
                    'year_return': 8.2,
                    'volatility': 0,
                    'category': 'Government Scheme'
                }
            ]
            
            for option in safe_options:
                if len(all_recommendations) < 3:
                    all_recommendations.append(option)
        
        return all_recommendations[:3]

class FinVerseModel:
    """Machine Learning model for portfolio allocation"""
    
    def __init__(self):
        self.model = None
        self.market_fetcher = MarketDataFetcher()
        
    def prepare_features(self, data):
        """Prepare features for training"""
        feature_cols = ['age', 'annual_income', 'monthly_savings', 'emergency_fund', 
                       'risk_appetite', 'investment_goal', 'existing_investment_pct']
        target_cols = ['debt_allocation', 'equity_allocation', 'mutual_fund_allocation']
        
        X = data[feature_cols]
        y = data[target_cols]
        
        numerical_features = ['age', 'annual_income', 'monthly_savings', 
                            'emergency_fund', 'existing_investment_pct']
        categorical_features = ['risk_appetite', 'investment_goal']
        
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), numerical_features),
                ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_features)
            ]
        )
        
        return X, y, preprocessor
    
    def train_model(self, data):
        """Train the Random Forest model"""
        X, y, preprocessor = self.prepare_features(data)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        self.model = Pipeline([
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(
                n_estimators=200,
                max_depth=20,
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42,
                n_jobs=-1
            ))
        ])
        
        self.model.fit(X_train, y_train)
        
        # Evaluate model
        y_pred = self.model.predict(X_test)
        print("\nModel Performance:")
        print("-" * 40)
        
        for i, target in enumerate(['Debt', 'Equity', 'Mutual Fund']):
            r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
            mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
            print(f"{target} Allocation - R²: {r2:.4f}, MAE: {mae:.4f}")
        
        return self.model
    
    def save_model(self, filename='finverse_model.pkl'):
        """Save the trained model"""
        joblib.dump(self.model, filename)
        print(f"Model saved to {filename}")

# LangChain Integration Classes

class FinancialDataExtractor:
    """Extracts financial information from user input using LangChain"""
    
    def __init__(self, llm):
        self.llm = llm
        self.extraction_prompt = PromptTemplate(
            input_variables=["user_input"],
            template="""
You are a financial data extraction expert. Extract the following information from the user's message:

User Input: {user_input}

Please extract and return ONLY a JSON object with the following keys (use null if not found):
- "goal_amount": numeric value in rupees (convert lakhs/crores to actual numbers)
- "timeline_months": numeric value in months
- "risk_appetite": one of "Low", "Medium", "High"
- "age": numeric age
- "annual_income": numeric value in rupees
- "goal_type": short description of the goal (e.g., "vacation", "wedding", "retirement")
- "investment_goal": one of "Wealth Creation", "Retirement", "Short-Term", "Education", "Emergency Fund", "Tax Saving", "Property Purchase"

Examples:
"I want to save 5L for vacation in 6 months" -> {{"goal_amount": 500000, "timeline_months": 6, "goal_type": "vacation", "investment_goal": "Short-Term"}}
"I'm 30, earn 10L annually, medium risk" -> {{"age": 30, "annual_income": 1000000, "risk_appetite": "Medium"}}

Return only the JSON object, nothing else:
"""
        )
        
        self.extraction_chain = LLMChain(
            llm=self.llm,
            prompt=self.extraction_prompt
        )
    
    def extract_financial_data(self, user_input: str) -> Dict[str, Any]:
        """Extract financial data from user input"""
        try:
            result = self.extraction_chain.run(user_input=user_input)
            # Clean the result to extract JSON
            result = result.strip()
            if result.startswith('```json'):
                result = result[7:-3]
            elif result.startswith('```'):
                result = result[3:-3]
            
            extracted_data = json.loads(result)
            # Filter out null values
            return {k: v for k, v in extracted_data.items() if v is not None}
        except Exception as e:
            print(f"Error extracting data: {e}")
            return {}

class FinancialRecommendationGenerator:
    """Generates financial recommendations using LangChain"""
    
    def __init__(self, model_pipeline, market_fetcher, llm):
        self.model_pipeline = model_pipeline
        self.market_fetcher = market_fetcher
        self.llm = llm
        
        self.recommendation_prompt = PromptTemplate(
            input_variables=["user_profile", "allocation", "recommendations", "goal_amount", "timeline_months"],
            template="""
You are a professional financial advisor. Based on the following information, provide a comprehensive investment recommendation:

User Profile: {user_profile}

Recommended Portfolio Allocation:
- Debt: {allocation[0]:.1f}%
- Equity: {allocation[1]:.1f}%
- Mutual Funds: {allocation[2]:.1f}%

Top Investment Options:
{recommendations}

Goal Amount: ₹{goal_amount:,}
Timeline: {timeline_months} months

Please provide:
1. A brief explanation of why this allocation suits the user
2. Monthly investment amount needed
3. Key benefits and risks
4. Any additional advice

Keep the response conversational and easy to understand.
"""
        )
        
        self.recommendation_chain = LLMChain(
            llm=self.llm,
            prompt=self.recommendation_prompt
        )
    
    def generate_recommendation(self, user_context: Dict[str, Any]) -> str:
        """Generate recommendation based on user context"""
        # Create profile for model prediction
        profile = {
            'age': user_context.get('age', 30),
            'annual_income': user_context.get('annual_income', 800000),
            'monthly_savings': user_context.get('goal_amount', 100000) / user_context.get('timeline_months', 12) if user_context.get('timeline_months', 12) > 0 else 20000,
            'emergency_fund': 100000,
            'risk_appetite': user_context.get('risk_appetite', 'Medium'),
            'investment_goal': user_context.get('investment_goal', 'Wealth Creation'),
            'existing_investment_pct': 0.1
        }
        
        profile_df = pd.DataFrame([profile])
        allocation = self.model_pipeline.predict(profile_df)[0]
        
        # Get market recommendations
        recommendations = self.market_fetcher.get_recommendations(
            user_context.get('risk_appetite', 'Medium'),
            user_context.get('timeline_months', 12),
            user_context.get('goal_amount', 100000),
            allocation
        )
        
        # Format recommendations for the prompt
        rec_text = []
        for i, rec in enumerate(recommendations, 1):
            rec_text.append(f"{i}. {rec['name']} - Expected Return: {rec['year_return']:.2f}%")
        recommendations_str = "\n".join(rec_text)
        
        # Generate LLM response
        response = self.recommendation_chain.run(
            user_profile=str(profile),
            allocation=allocation,
            recommendations=recommendations_str,
            goal_amount=user_context.get('goal_amount', 100000),
            timeline_months=user_context.get('timeline_months', 12)
        )
        
        return response

class FinVerseAgent:
    """Main agent that orchestrates the financial advisory process"""
    
    def __init__(self, model_pipeline, market_fetcher, llm_model="gpt-3.5-turbo"):
        self.model_pipeline = model_pipeline
        self.market_fetcher = market_fetcher
        
        # Initialize LLM
        try:
            self.llm = ChatOpenAI(model=llm_model, temperature=0.3)
        except Exception as e:
            print(f"Warning: Could not initialize OpenAI LLM: {e}")
            print("Using fallback mode without LLM features")
            self.llm = None
        
        # Initialize components
        if self.llm:
            self.data_extractor = FinancialDataExtractor(self.llm)
            self.recommendation_generator = FinancialRecommendationGenerator(
                model_pipeline, market_fetcher, self.llm
            )
        
        # Initialize memory
        self.memory = ConversationBufferWindowMemory(
            k=10,
            return_messages=True,
            memory_key="chat_history"
        )
        
        # User context storage
        self.user_context = {}
        
        # Create tools
        self.tools = self._create_tools()
        
        # Initialize agent if LLM is available
        if self.llm:
            try:
                self.agent = initialize_agent(
                    self.tools,
                    self.llm,
                    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
                    memory=self.memory,
                    verbose=False
                )
            except Exception as e:
                print(f"Warning: Could not initialize agent: {e}")
                self.agent = None
    
    def _create_tools(self) -> List[Tool]:
        """Create tools for the agent"""
        tools = [
            Tool(
                name="get_portfolio_allocation",
                description="Get recommended portfolio allocation based on user profile",
                func=self._get_portfolio_allocation
            ),
            Tool(
                name="get_market_recommendations",
                description="Get current market investment recommendations",
                func=self._get_market_recommendations
            ),
            Tool(
                name="calculate_monthly_investment",
                description="Calculate monthly investment required for a goal",
                func=self._calculate_monthly_investment
            )
        ]
        return tools
    
    def _get_portfolio_allocation(self, user_profile_str: str) -> str:
        """Tool function to get portfolio allocation"""
        try:
            # Parse user profile from string
            profile = json.loads(user_profile_str)
            profile_df = pd.DataFrame([profile])
            allocation = self.model_pipeline.predict(profile_df)[0]
            
            return f"Recommended allocation: Debt {allocation[0]:.1f}%, Equity {allocation[1]:.1f}%, Mutual Funds {allocation[2]:.1f}%"
        except Exception as e:
            return f"Error calculating allocation: {e}"
    
    def _get_market_recommendations(self, params_str: str) -> str:
        """Tool function to get market recommendations"""
        try:
            params = json.loads(params_str)
            recommendations = self.market_fetcher.get_recommendations(
                params.get('risk_appetite', 'Medium'),
                params.get('timeline_months', 12),
                params.get('goal_amount', 100000),
                params.get('allocation', [30, 40, 30])
            )
            
            result = "Top investment options:\n"
            for i, rec in enumerate(recommendations, 1):
                result += f"{i}. {rec['name']} - Expected return: {rec['year_return']:.2f}%\n"
            
            return result
        except Exception as e:
            return f"Error fetching recommendations: {e}"
    
    def _calculate_monthly_investment(self, goal_params_str: str) -> str:
        """Tool function to calculate monthly investment"""
        try:
            params = json.loads(goal_params_str)
            goal_amount = params.get('goal_amount', 100000)
            timeline_months = params.get('timeline_months', 12)
            
            if timeline_months > 0:
                monthly_investment = goal_amount / timeline_months
                return f"Monthly investment required: ₹{monthly_investment:,.0f}"
            else:
                return "Invalid timeline provided"
        except Exception as e:
            return f"Error calculating monthly investment: {e}"
    
    def process_user_input(self, user_input: str) -> str:
        """Process user input and provide financial advice"""
        
        # Extract financial data from user input
        if self.llm:
            extracted_data = self.data_extractor.extract_financial_data(user_input)
            # Update user context
            self.user_context.update(extracted_data)
        else:
            # Fallback to regex-based extraction
            extracted_data = self._parse_goal_context_fallback(user_input)
            self.user_context.update(extracted_data)
        
        # Handle non-financial queries
        non_financial_keywords = ['weather', 'recipe', 'movie', 'game', 'food', 'sports', 'cricket', 'football']
        if any(word in user_input.lower() for word in non_financial_keywords) and \
           not any(word in user_input.lower() for word in ['invest', 'save', 'money', 'fund', 'portfolio']):
            return "I'm a financial advisor AI. I can help you with investment planning, portfolio recommendations, and financial goal planning. What financial goals would you like to discuss?"
        
        # Check if we have enough information for recommendations
        if 'goal_amount' in self.user_context and 'timeline_months' in self.user_context:
            if self.llm:
                # Use LangChain for sophisticated response
                try:
                    response = self.recommendation_generator.generate_recommendation(self.user_context)
                    return response
                except Exception as e:
                    print(f"LLM error: {e}")
                    # Fall back to traditional method
                    return self._generate_traditional_recommendation()
            else:
                return self._generate_traditional_recommendation()
        else:
            # Ask for missing information
            missing_info = []
            if 'goal_amount' not in self.user_context:
                missing_info.append("goal amount")
            if 'timeline_months' not in self.user_context:
                missing_info.append("timeline")
            
            if missing_info:
                missing_str = " and ".join(missing_info)
                return f"I need to know your {missing_str} to provide specific recommendations. Could you please provide this information?"
            else:
                # Provide general portfolio advice
                return self._generate_general_advice()
    
    def _generate_traditional_recommendation(self) -> str:
        """Generate recommendation using traditional method"""
        # Create profile for model prediction
        profile = {
            'age': self.user_context.get('age', 30),
            'annual_income': self.user_context.get('annual_income', 800000),
            'monthly_savings': self.user_context.get('goal_amount', 100000) / self.user_context.get('timeline_months', 12) if self.user_context.get('timeline_months', 12) > 0 else 20000,
            'emergency_fund': 100000,
            'risk_appetite': self.user_context.get('risk_appetite', 'Medium'),
            'investment_goal': self.user_context.get('investment_goal', 'Wealth Creation'),
            'existing_investment_pct': 0.1
        }
        
        profile_df = pd.DataFrame([profile])
        allocation = self.model_pipeline.predict(profile_df)[0]
        
        # Get market recommendations
        recommendations = self.market_fetcher.get_recommendations(
            self.user_context.get('risk_appetite', 'Medium'),
            self.user_context.get('timeline_months', 12),
            self.user_context.get('goal_amount', 100000),
            allocation
        )
        
        return self._format_traditional_recommendation(recommendations, allocation)
    
    def _format_traditional_recommendation(self, recommendations, allocation) -> str:
        """Format recommendation in traditional format"""
        goal_amount = self.user_context.get('goal_amount', 100000)
        timeline_months = self.user_context.get('timeline_months', 12)
        monthly_investment = goal_amount / timeline_months if timeline_months > 0 else 0
        
        output = f"\n📈 Recommended Portfolio Allocation:\n"
        output += f"   • Debt Instruments: {allocation[0]:.1f}%\n"
        output += f"   • Equity: {allocation[1]:.1f}%\n"
        output += f"   • Mutual Funds: {allocation[2]:.1f}%\n"
        
        output += f"\n💰 Target Amount: ₹{goal_amount:,} in {timeline_months} months\n"
        output += f"   Monthly Investment Required: ₹{monthly_investment:,.0f}\n"
        
        if recommendations:
            output += f"\n📊 Top Investment Options:\n"
            for i, rec in enumerate(recommendations, 1):
                output += f"\n{i}. {rec['name']} ({rec['ticker']})\n"
                output += f"   Expected Return: {rec['year_return']:.2f}%\n"
                output += f"   Category: {rec['category']}\n"
        
        # Add allocation amounts
        output += f"\n💸 Allocation Breakdown:\n"
        output += f"   • Debt: ₹{monthly_investment * allocation[0] / 100:,.0f} per month\n"
        output += f"   • Equity: ₹{monthly_investment * allocation[1] / 100:,.0f} per month\n"
        output += f"   • Mutual Funds: ₹{monthly_investment * allocation[2] / 100:,.0f} per month\n"
        
        return output
    
    def _generate_general_advice(self) -> str:
        """Generate general portfolio advice"""
        profile = {
            'age': self.user_context.get('age', 30),
            'annual_income': self.user_context.get('annual_income', 800000),
            'monthly_savings': 20000,
            'emergency_fund': 100000,
            'risk_appetite': self.user_context.get('risk_appetite', 'Medium'),
            'investment_goal': self.user_context.get('investment_goal', 'Wealth Creation'),
            'existing_investment_pct': 0.1
        }
        
        profile_df = pd.DataFrame([profile])
        allocation = self.model_pipeline.predict(profile_df)[0]
        
        output = f"📈 Based on your profile, here's my portfolio recommendation:\n"
        output += f"   • Debt Instruments: {allocation[0]:.1f}%\n"
        output += f"   • Equity: {allocation[1]:.1f}%\n"
        output += f"   • Mutual Funds: {allocation[2]:.1f}%\n"
        output += f"\n💡 To get specific investment options and monthly contribution amounts,\n"
        output += f"   please provide your financial goal and timeline.\n"
        
        return output
    
    def _parse_goal_context_fallback(self, message: str) -> Dict[str, Any]:
        """Fallback parsing method using regex when LLM is not available"""
        extracted = {}
        
        # Amount parsing patterns
        amount_patterns = [
            r'(?:₹|Rs\.?|INR)?\s?(\d+(?:,\d+)*(?:\.\d+)?)\s*(L|l|lakhs?|lacs?|K|k|thousands?|Cr|cr|crores?)?',
            r'(\d+(?:,\d+)*(?:\.\d+)?)\s*(L|l|lakhs?|lacs?|K|k|thousands?|Cr|cr|crores?)?(?:\s*(?:rupees?|Rs\.?|INR))?',
            r'(\d+(?:,\d+)*(?:\.\d+)?)(?:\s|$)'
        ]
        
        for pattern in amount_patterns:
            amount_match = re.search(pattern, message, re.IGNORECASE)
            if amount_match:
                value_str = amount_match.group(1).replace(',', '')
                value = float(value_str)
                
                suffix = amount_match.group(2) if len(amount_match.groups()) > 1 else None
                
                if suffix:
                    suffix_lower = suffix.lower()
                    if suffix_lower.startswith('l') or suffix_lower.startswith('lac'):
                        value *= 100000
                    elif suffix_lower.startswith('k'):
                        value *= 1000
                    elif suffix_lower.startswith('cr'):
                        value *= 10000000
                
                if value >= 1000:
                    extracted["goal_amount"] = int(value)
                    break
        
        # Time patterns
        time_patterns = [
            r'(\d+)\s*(years?|yrs?)',
            r'(\d+)\s*(months?|mths?)',
            r'in\s*(\d+)\s*(years?|months?)',
            r'after\s*(\d+)\s*(years?|months?)'
        ]
        
        for pattern in time_patterns:
            time_match = re.search(pattern, message, re.IGNORECASE)
            if time_match:
                value = int(time_match.group(1))
                unit = time_match.group(2).lower()
                if 'year' in unit or 'yr' in unit:
                    extracted["timeline_months"] = value * 12
                else:
                    extracted["timeline_months"] = value
                break
        
        # Goal keywords
        goal_keywords = {
            'vacation': 'Short-Term',
            'holiday': 'Short-Term',
            'trip': 'Short-Term',
            'car': 'Short-Term',
            'vehicle': 'Short-Term',
            'wedding': 'Short-Term',
            'marriage': 'Short-Term',
            'house': 'Property Purchase',
            'home': 'Property Purchase',
            'education': 'Education',
            'study': 'Education',
            'retire': 'Retirement',
            'retirement': 'Retirement'
        }
        
        message_lower = message.lower()
        for keyword, goal_type in goal_keywords.items():
            if keyword in message_lower:
                extracted["investment_goal"] = goal_type
                break
        
        # Risk appetite
        if "low risk" in message_lower:
            extracted["risk_appetite"] = "Low"
        elif "high risk" in message_lower:
            extracted["risk_appetite"] = "High"
        elif "medium risk" in message_lower or "moderate risk" in message_lower:
            extracted["risk_appetite"] = "Medium"
        
        # Age
        age_match = re.search(r'\b(\d{2})\b(?:\s*years?\s*old)?', message)
        if age_match:
            extracted["age"] = int(age_match.group(1))
        
        # Annual income
        income_match = re.search(r'earn\s*(?:₹|Rs\.?|INR)?\s?(\d+(?:,\d+)*(?:\.\d+)?)\s*(L|l|lakhs?|K|k)?', message, re.IGNORECASE)
        if income_match:
            value_str = income_match.group(1).replace(',', '')
            value = float(value_str)
            suffix = income_match.group(2)
            if suffix and suffix.lower().startswith('l'):
                value *= 100000
            elif suffix and suffix.lower().startswith('k'):
                value *= 1000
            extracted["annual_income"] = int(value)
        
        return extracted
    
    def reset_context(self):
        """Reset user context for new conversation"""
        self.user_context = {}
        self.memory.clear()
    
    def chat(self, user_input: str) -> str:
        """Main chat interface"""
        if user_input.lower() in ['reset', 'clear', 'new']:
            self.reset_context()
            return "Context cleared. How can I help you with your financial planning?"
        
        return self.process_user_input(user_input)

class FinancialEducationTool:
    """Provides financial education and explanations"""
    
    def __init__(self, llm=None):
        self.llm = llm
        self.topics = {
            'mutual_fund': """
A mutual fund is a pool of money collected from many investors to invest in securities such as stocks, bonds, and other assets. Professional fund managers allocate the fund's assets and attempt to produce capital gains or income for the fund's investors.

Benefits:
- Professional management
- Diversification
- Liquidity
- Low minimum investment

Types:
- Equity funds
- Debt funds
- Hybrid funds
- Index funds
""",
            'sip': """
SIP (Systematic Investment Plan) is a method of investing a fixed sum regularly in a mutual fund scheme. It's similar to a recurring deposit but in mutual funds.

Benefits:
- Rupee cost averaging
- Power of compounding
- Disciplined investment
- Flexibility

Example: Investing ₹5,000 monthly in an equity fund for 10 years
""",
            'portfolio_diversification': """
Portfolio diversification is the practice of spreading investments across various financial instruments, industries, and other categories to reduce risk.

Key principles:
- Don't put all eggs in one basket
- Mix different asset classes
- Balance risk and return
- Regular rebalancing

Ideal mix depends on age, risk tolerance, and goals
"""
        }
    
    def get_explanation(self, topic: str) -> str:
        """Get explanation for a financial topic"""
        topic_lower = topic.lower()
        
        # Check predefined topics
        for key, explanation in self.topics.items():
            if key in topic_lower:
                return explanation
        
        # If LLM is available, generate custom explanation
        if self.llm:
            prompt = PromptTemplate(
                input_variables=["topic"],
                template="""
Explain the following financial concept in simple terms for an Indian investor:
Topic: {topic}

Include:
1. Basic definition
2. How it works
3. Benefits
4. Example (if applicable)

Keep it concise and easy to understand.
"""
            )
            chain = LLMChain(llm=self.llm, prompt=prompt)
            try:
                return chain.run(topic=topic)
            except:
                pass
        
        return f"I don't have specific information about '{topic}'. Please ask about mutual funds, SIP, portfolio diversification, or other investment topics."

class EnhancedFinVerseChat:
    """Enhanced chat interface with LangChain integration"""
    
    def __init__(self, model_pipeline, market_fetcher):
        self.model_pipeline = model_pipeline
        self.market_fetcher = market_fetcher
        self.agent = FinVerseAgent(model_pipeline, market_fetcher)
        self.education_tool = FinancialEducationTool(self.agent.llm)
    
    def run(self):
        """Run the enhanced chat interface"""
        print("\n" + "="*60)
        print("💬 Welcome to FinVerse AI - Enhanced Investment Advisor!")
        print("="*60)
        
        print("\n📋 I can help you with:")
        print("  • Portfolio allocation recommendations")
        print("  • Investment planning for your goals")
        print("  • Real-time market suggestions")
        print("  • Financial education and explanations")
        
        print("\n💡 Example queries:")
        print("  • 'I want to save 5L for a vacation in 6 months'")
        print("  • 'Need 20L for wedding in 2 years, medium risk'")
        print("  • 'What is a mutual fund?'")
        print("  • 'Explain SIP'")
        
        print("\n⌨️  Commands:")
        print("  • 'reset' - Clear conversation history")
        print("  • 'exit' - Quit the application")
        print("="*60)
        
        if not self.agent.llm:
            print("\n⚠️  Note: OpenAI API not configured. Running in basic mode.")
            print("   Set OPENAI_API_KEY environment variable for enhanced features.")
        
        print()
        
        while True:
            try:
                user_input = input("🧑 You: ").strip()
                
                if not user_input:
                    continue
                
                if user_input.lower() in ['exit', 'quit', 'bye']:
                    print("\n👋 Thank you for using FinVerse AI. Happy investing!")
                    break
                
                # Check for education queries
                education_keywords = ['what is', 'explain', 'tell me about', 'how does', 'what are']
                if any(keyword in user_input.lower() for keyword in education_keywords):
                    # Extract topic
                    for keyword in education_keywords:
                        if keyword in user_input.lower():
                            topic = user_input.lower().split(keyword)[-1].strip()
                            response = self.education_tool.get_explanation(topic)
                            print(f"\n🤖 FinVerse AI:\n{response}\n")
                            break
                else:
                    # Process as investment query
                    response = self.agent.chat(user_input)
                    print(f"\n🤖 FinVerse AI: {response}\n")
                
            except KeyboardInterrupt:
                print("\n\n👋 Thank you for using FinVerse AI. Happy investing!")
                break
            except Exception as e:
                print(f"\n❌ Error: {e}")
                print("Please try again or type 'reset' to clear the conversation.\n")

# Main execution functions

def initialize_finverse_system():
    """Initialize the FinVerse system with all components"""
    print("🚀 Initializing FinVerse AI with LangChain...")
    
    # Generate synthetic data
    print("📊 Generating synthetic financial data...")
    generator = FinVerseDataGenerator()
    synthetic_data = generator.generate_synthetic_data(n_samples=8000)
    
    # Train model
    print("🧠 Training Random Forest model...")
    finverse_model = FinVerseModel()
    model_pipeline = finverse_model.train_model(synthetic_data)
    finverse_model.save_model()
    
    # Initialize market fetcher
    print("📈 Initializing market data fetcher...")
    market_fetcher = MarketDataFetcher()
    
    print("✅ FinVerse AI system initialized successfully!")
    
    return model_pipeline, market_fetcher

def test_system():
    """Test the system with sample queries"""
    print("\n" + "="*60)
    print("🧪 Running System Tests")
    print("="*60)
    
    # Initialize system
    model_pipeline, market_fetcher = initialize_finverse_system()
    
    # Test cases
    test_queries = [
        "I want to save 5 lakhs for vacation in 6 months",
        "Need 20L for wedding in 2 years, I'm 28 years old with medium risk appetite",
        "I earn 10L annually and want to invest for retirement"
    ]
    
    agent = FinVerseAgent(model_pipeline, market_fetcher)
    
    for i, query in enumerate(test_queries, 1):
        print(f"\n📝 Test {i}: {query}")
        response = agent.chat(query)
        print(f"Response: {response[:200]}...")
        agent.reset_context()
    
    print("\n✅ Tests completed!")

def main():
    """Main function to run the enhanced FinVerse AI system"""
    try:
        # Check for API key
        if not os.environ.get("OPENAI_API_KEY"):
            print("\n⚠️  Warning: OPENAI_API_KEY not set in environment variables.")
            print("The system will run in basic mode without LLM features.")
            print("To enable full features, set your OpenAI API key:")
            print('  export OPENAI_API_KEY="your-api-key-here"\n')
        
        # Initialize the system
        model_pipeline, market_fetcher = initialize_finverse_system()
        
        # Launch enhanced chat interface
        chat_interface = EnhancedFinVerseChat(model_pipeline, market_fetcher)
        chat_interface.run()
        
    except KeyboardInterrupt:
        print("\n👋 System shutdown requested. Goodbye!")
    except Exception as e:
        print(f"\n❌ System error: {e}")
        import traceback
        traceback.print_exc()
        print("\nPlease check your configuration and try again.")

if __name__ == "__main__":
    # Uncomment to run tests
    # test_system()
    # Run main application
    main()

🚀 Initializing FinVerse AI with LangChain...
📊 Generating synthetic financial data...
🧠 Training Random Forest model...

Model Performance:
----------------------------------------
Debt Allocation - R²: 0.9846, MAE: 0.8397
Equity Allocation - R²: 0.9864, MAE: 1.0830
Mutual Fund Allocation - R²: 0.9882, MAE: 0.9933
Model saved to finverse_model.pkl
📈 Initializing market data fetcher...
✅ FinVerse AI system initialized successfully!

❌ System error: "FinancialRecommendationChain" object has no field "model_pipeline"
Please check your configuration and try again.
