In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib
import warnings
warnings.filterwarnings('ignore')
import re
import yfinance as yf
from datetime import datetime, timedelta

class FinVerseDataGenerator:
    def __init__(self):
        pass
    
    def generate_synthetic_data(self, n_samples=10000):
        np.random.seed(42)
        
        age = np.random.normal(32, 8, n_samples).clip(18, 50)
        
        base_income = 400000 + (age - 18) * 25000
        income_variation = np.random.lognormal(0, 0.6, n_samples)
        annual_income = (base_income * income_variation).clip(300000, 3000000)
        
        savings_rate = np.random.beta(2, 3, n_samples) * 0.3 + 0.1
        monthly_savings = (annual_income * savings_rate) / 12
        
        emergency_months = np.random.gamma(2, 2, n_samples).clip(2, 15)
        emergency_fund = monthly_savings * emergency_months
        
        risk_probs = np.zeros((n_samples, 3))
        for i in range(n_samples):
            if age[i] < 30:
                risk_probs[i] = [0.2, 0.4, 0.4]
            elif age[i] < 40:
                risk_probs[i] = [0.3, 0.5, 0.2]
            else:
                risk_probs[i] = [0.5, 0.4, 0.1]
        
        risk_appetite = []
        for i in range(n_samples):
            risk_appetite.append(np.random.choice(['Low', 'Medium', 'High'], p=risk_probs[i]))
        
        goal_probs = np.zeros((n_samples, 7))
        goal_names = ['Wealth Creation', 'Retirement', 'Short-Term', 'Education', 
                     'Emergency Fund', 'Tax Saving', 'Property Purchase']
        
        for i in range(n_samples):
            if age[i] < 30:
                goal_probs[i] = [0.35, 0.1, 0.2, 0.15, 0.05, 0.1, 0.05]
            elif age[i] < 40:
                goal_probs[i] = [0.25, 0.2, 0.15, 0.1, 0.1, 0.15, 0.05]
            else:
                goal_probs[i] = [0.2, 0.35, 0.1, 0.05, 0.15, 0.1, 0.05]
        
        investment_goals = []
        for i in range(n_samples):
            investment_goals.append(np.random.choice(goal_names, p=goal_probs[i]))
        
        existing_investment = np.random.beta(2, 5, n_samples) * 0.4
        
        data = pd.DataFrame({
            'age': age.astype(int),
            'annual_income': annual_income.astype(int),
            'monthly_savings': monthly_savings.astype(int),
            'emergency_fund': emergency_fund.astype(int),
            'risk_appetite': risk_appetite,
            'investment_goal': investment_goals,
            'existing_investment_pct': existing_investment.round(3)
        })
        
        allocations = self._generate_realistic_allocations(data)
        
        final_data = pd.concat([data, allocations], axis=1)
        
        return final_data
    
    def _generate_realistic_allocations(self, data):
        n_samples = len(data)
        
        debt_allocation = np.zeros(n_samples)
        equity_allocation = np.zeros(n_samples)
        mutual_fund_allocation = np.zeros(n_samples)
        
        for i in range(n_samples):
            age = data.iloc[i]['age']
            risk = data.iloc[i]['risk_appetite']
            goal = data.iloc[i]['investment_goal']
            income = data.iloc[i]['annual_income']
            
            base_equity = max(30, min(70, 100 - age))
            
            if risk == 'Low':
                equity_pct = base_equity * 0.7
                debt_pct = min(60, 70 - equity_pct)
            elif risk == 'Medium':
                equity_pct = base_equity * 0.9
                debt_pct = min(50, 60 - equity_pct)
            else:
                equity_pct = min(75, base_equity * 1.3)
                debt_pct = max(15, 40 - equity_pct)
            
            if goal == 'Short-Term':
                debt_pct = min(70, debt_pct * 1.6)
                equity_pct = max(15, equity_pct * 0.6)
            elif goal == 'Retirement':
                if age < 35:
                    equity_pct = min(70, equity_pct * 1.1)
                else:
                    debt_pct = min(55, debt_pct * 1.2)
            elif goal == 'Education':
                debt_pct = min(55, debt_pct * 1.3)
                equity_pct = max(20, equity_pct * 0.8)
            elif goal == 'Wealth Creation':
                equity_pct = min(75, equity_pct * 1.2)
                debt_pct = max(15, debt_pct * 0.8)
            elif goal == 'Tax Saving':
                mutual_fund_pct = 40
                remaining = 60
                debt_pct = debt_pct * (remaining / (debt_pct + equity_pct))
                equity_pct = equity_pct * (remaining / (debt_pct + equity_pct))
            
            if goal != 'Tax Saving':
                mutual_fund_pct = 100 - debt_pct - equity_pct
            
            if mutual_fund_pct < 20:
                mutual_fund_pct = 20
                remaining = 80
                total_de = debt_pct + equity_pct
                if total_de > 0:
                    debt_pct = debt_pct * (remaining / total_de)
                    equity_pct = equity_pct * (remaining / total_de)
            
            noise = np.random.normal(0, 1.5, 3)
            debt_pct = max(5, debt_pct + noise[0])
            equity_pct = max(10, equity_pct + noise[1])
            mutual_fund_pct = max(15, mutual_fund_pct + noise[2])
            
            total = debt_pct + equity_pct + mutual_fund_pct
            debt_pct = (debt_pct / total) * 100
            equity_pct = (equity_pct / total) * 100
            mutual_fund_pct = (mutual_fund_pct / total) * 100
            
            debt_allocation[i] = round(debt_pct, 2)
            equity_allocation[i] = round(equity_pct, 2)
            mutual_fund_allocation[i] = round(mutual_fund_pct, 2)
        
        return pd.DataFrame({
            'debt_allocation': debt_allocation,
            'equity_allocation': equity_allocation,
            'mutual_fund_allocation': mutual_fund_allocation
        })
    
    def save_to_csv(self, data, filename='finverse_training_data.csv'):
        data.to_csv(filename, index=False)
        print(f"Data saved to {filename}")
        return filename

class MarketDataFetcher:
    def __init__(self):
        self.nifty50_stocks = ['RELIANCE.NS', 'TCS.NS', 'HDFCBANK.NS', 'INFY.NS', 'ICICIBANK.NS',
                               'HDFC.NS', 'ITC.NS', 'SBIN.NS', 'BHARTIARTL.NS', 'KOTAKBANK.NS']
        self.mutual_funds_etf = ['0P0000XVJR.BO', '0P0000XW5S.BO', '0P0000XVJT.BO']
        self.debt_instruments = ['LIQUIDBEES.NS', 'GOLDBEES.NS']
        
    def get_stock_data(self, ticker, period='1y'):
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(period=period)
            info = stock.info
            return {
                'ticker': ticker,
                'name': info.get('longName', ticker),
                'current_price': info.get('currentPrice', hist['Close'][-1] if len(hist) > 0 else 0),
                'year_return': ((hist['Close'][-1] - hist['Close'][0]) / hist['Close'][0] * 100) if len(hist) > 1 else 0,
                'volatility': hist['Close'].pct_change().std() * np.sqrt(252) * 100 if len(hist) > 1 else 0,
                'category': 'Equity'
            }
        except:
            return None
    
    def get_mutual_fund_data(self, ticker):
        try:
            fund = yf.Ticker(ticker)
            hist = fund.history(period='1y')
            info = fund.info
            return {
                'ticker': ticker,
                'name': info.get('longName', ticker),
                'current_nav': hist['Close'][-1] if len(hist) > 0 else 0,
                'year_return': ((hist['Close'][-1] - hist['Close'][0]) / hist['Close'][0] * 100) if len(hist) > 1 else 0,
                'volatility': hist['Close'].pct_change().std() * np.sqrt(252) * 100 if len(hist) > 1 else 0,
                'category': 'Mutual Fund'
            }
        except:
            return None
    
    def get_recommendations(self, risk_appetite, investment_horizon_months, amount):
        recommendations = []
        
        if investment_horizon_months <= 6:
            debt_data = self.get_stock_data('LIQUIDBEES.NS', period='6mo')
            if debt_data:
                recommendations.append(debt_data)
            
            fd_option = {
                'ticker': 'FD',
                'name': 'Fixed Deposit',
                'current_price': amount,
                'year_return': 6.5,
                'volatility': 0,
                'category': 'Debt'
            }
            recommendations.append(fd_option)
            
        elif investment_horizon_months <= 12:
            if risk_appetite == 'Low':
                for ticker in self.debt_instruments:
                    data = self.get_stock_data(ticker, period='1y')
                    if data:
                        recommendations.append(data)
            elif risk_appetite == 'Medium':
                balanced_funds = ['0P0000XVJR.BO']
                for ticker in balanced_funds:
                    data = self.get_mutual_fund_data(ticker)
                    if data:
                        recommendations.append(data)
            else:
                for ticker in ['TCS.NS', 'INFY.NS']:
                    data = self.get_stock_data(ticker, period='1y')
                    if data:
                        recommendations.append(data)
        else:
            if risk_appetite == 'High':
                for ticker in self.nifty50_stocks[:5]:
                    data = self.get_stock_data(ticker, period='2y')
                    if data:
                        recommendations.append(data)
            else:
                for ticker in self.mutual_funds_etf:
                    data = self.get_mutual_fund_data(ticker)
                    if data:
                        recommendations.append(data)
        
        return recommendations[:3]

class FinVerseModel:
    def __init__(self):
        self.model = None
        self.market_fetcher = MarketDataFetcher()
        
    def prepare_features(self, data):
        feature_cols = ['age', 'annual_income', 'monthly_savings', 'emergency_fund', 
                       'risk_appetite', 'investment_goal', 'existing_investment_pct']
        target_cols = ['debt_allocation', 'equity_allocation', 'mutual_fund_allocation']
        
        X = data[feature_cols]
        y = data[target_cols]
        
        numerical_features = ['age', 'annual_income', 'monthly_savings', 
                            'emergency_fund', 'existing_investment_pct']
        categorical_features = ['risk_appetite', 'investment_goal']
        
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', StandardScaler(), numerical_features),
                ('cat', OneHotEncoder(drop='first', sparse_output=False), categorical_features)
            ]
        )
        
        return X, y, preprocessor
    
    def train_model(self, data):
        X, y, preprocessor = self.prepare_features(data)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        self.model = Pipeline([
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(
                n_estimators=200,
                max_depth=20,
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42,
                n_jobs=-1
            ))
        ])
        
        self.model.fit(X_train, y_train)
        
        y_pred = self.model.predict(X_test)
        
        print("\nModel Performance:")
        print("-" * 40)
        
        for i, target in enumerate(['Debt', 'Equity', 'Mutual Fund']):
            r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
            mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
            print(f"{target} Allocation - R²: {r2:.4f}, MAE: {mae:.4f}")
        
        return self.model
    
    def save_model(self, filename='finverse_model.pkl'):
        joblib.dump(self.model, filename)
        print(f"Model saved to {filename}")

def parse_goal_context(message):
    extracted = {}
    
    amount_patterns = [
        r'(?:₹|Rs\.?|INR)?\s?(\d+\.?\d*)\s?(L|l|lakhs?|K|k|thousands?|Cr|cr|crores?)?',
        r'(\d+\.?\d*)\s?(L|l|lakhs?|K|k|thousands?|Cr|cr|crores?)\s*(?:rupees?|Rs\.?|INR)?'
    ]
    
    for pattern in amount_patterns:
        amount_match = re.search(pattern, message, re.IGNORECASE)
        if amount_match:
            value = float(amount_match.group(1))
            suffix = amount_match.group(2)
            if suffix:
                if suffix.lower().startswith('l'):
                    value *= 100000
                elif suffix.lower().startswith('k'):
                    value *= 1000
                elif suffix.lower().startswith('cr'):
                    value *= 10000000
            extracted["goal_amount"] = int(value)
            break
    
    time_patterns = [
        r'(\d+)\s*(years?|yrs?)',
        r'(\d+)\s*(months?|mths?)',
        r'in\s*(\d+)\s*(years?|months?)',
        r'after\s*(\d+)\s*(years?|months?)'
    ]
    
    for pattern in time_patterns:
        time_match = re.search(pattern, message, re.IGNORECASE)
        if time_match:
            value = int(time_match.group(1))
            unit = time_match.group(2).lower()
            if 'year' in unit:
                extracted["timeline_months"] = value * 12
            else:
                extracted["timeline_months"] = value
            break
    
    goal_keywords = {
        'vacation': 'vacation',
        'holiday': 'vacation',
        'trip': 'vacation',
        'car': 'car purchase',
        'vehicle': 'car purchase',
        'wedding': 'wedding',
        'marriage': 'wedding',
        'house': 'property',
        'home': 'property',
        'education': 'education',
        'study': 'education',
        'retire': 'retirement',
        'retirement': 'retirement'
    }
    
    message_lower = message.lower()
    for keyword, goal_type in goal_keywords.items():
        if keyword in message_lower:
            extracted["goal_type"] = goal_type
            break
    
    if "low risk" in message_lower:
        extracted["risk_appetite"] = "Low"
    elif "high risk" in message_lower:
        extracted["risk_appetite"] = "High"
    elif "medium risk" in message_lower or "moderate risk" in message_lower:
        extracted["risk_appetite"] = "Medium"
    
    age_match = re.search(r'\b(\d{2})\b(?:\s*years?\s*old)?', message)
    if age_match:
        extracted["age"] = int(age_match.group(1))
    
    income_match = re.search(r'earn\s*(?:₹|Rs\.?|INR)?\s?(\d+\.?\d*)\s?(L|l|lakhs?|K|k)?', message, re.IGNORECASE)
    if income_match:
        value = float(income_match.group(1))
        suffix = income_match.group(2)
        if suffix and suffix.lower().startswith('l'):
            value *= 100000
        elif suffix and suffix.lower().startswith('k'):
            value *= 1000
        extracted["annual_income"] = int(value)
    
    return extracted

def format_investment_recommendations(recommendations, goal_amount, timeline_months):
    if not recommendations:
        return "Unable to fetch market data at this time."
    
    output = f"\n📊 Investment Recommendations for ₹{goal_amount:,} in {timeline_months} months:\n"
    output += "=" * 70 + "\n\n"
    
    for i, rec in enumerate(recommendations, 1):
        output += f"{i}. {rec['name']} ({rec['ticker']})\n"
        output += f"   Category: {rec['category']}\n"
        if rec['category'] == 'Equity':
            output += f"   Current Price: ₹{rec['current_price']:.2f}\n"
        elif rec['category'] == 'Mutual Fund':
            output += f"   Current NAV: ₹{rec['current_nav']:.2f}\n"
        output += f"   1-Year Return: {rec['year_return']:.2f}%\n"
        output += f"   Volatility: {rec['volatility']:.2f}%\n"
        
        expected_return = rec['year_return'] * (timeline_months / 12)
        expected_value = goal_amount * (1 + expected_return / 100)
        output += f"   Expected Value: ₹{expected_value:,.0f}\n\n"
    
    output += "\n⚠️ Disclaimer: Past performance does not guarantee future results. "
    output += "This is not personalized financial advice. Please consult a financial advisor.\n"
    
    return output

def run_enhanced_chat(model_pipeline):
    market_fetcher = MarketDataFetcher()
    
    print("\n💬 Welcome to FinVerse AI - Enhanced Investment Advisor!")
    print("\nI can help you with:")
    print("  • Portfolio allocation recommendations")
    print("  • Short-term goal planning (vacation, car, wedding, etc.)")
    print("  • Real-time investment suggestions from Indian markets\n")
    print("Examples:")
    print("  • 'I want to save 5L for a vacation in 6 months'")
    print("  • 'Need 20L for wedding in 2 years, medium risk'")
    print("  • 'I'm 30, earn 10L annually, want to invest for retirement'\n")
    print("Type 'exit' to quit.\n")
    
    while True:
        user_input = input("🧑 You: ").strip()
        
        if not user_input:
            continue
        if user_input.lower() in ['exit', 'quit']:
            print("👋 Thank you for using FinVerse AI. Goodbye!")
            break
        
        context = parse_goal_context(user_input)
        
        if not any(key in ['finance', 'invest', 'save', 'money', 'portfolio', 'stock', 
                           'mutual fund', 'goal_type', 'goal_amount'] for key in 
                  list(context.keys()) + user_input.lower().split()):
            if 'weather' in user_input.lower() or 'recipe' in user_input.lower() or \
               'movie' in user_input.lower() or 'game' in user_input.lower():
                print("\n🤖 I'm a financial advisor AI. I can help you with investment planning,")
                print("   portfolio recommendations, and financial goal planning.")
                print("   What financial goals would you like to discuss?\n")
                continue
        
        if 'goal_amount' in context and 'goal_type' in context:
            if 'timeline_months' not in context:
                print("\n🤖 I see you want to save ₹{:,} for {}. ".format(
                    context['goal_amount'], context['goal_type']))
                timeline_input = input("   When do you need this amount? (e.g., '6 months', '2 years'): ")
                timeline_context = parse_goal_context(timeline_input)
                if 'timeline_months' in timeline_context:
                    context['timeline_months'] = timeline_context['timeline_months']
                else:
                    context['timeline_months'] = 12
                    print("   Assuming 1 year timeline.")
            
            risk = context.get('risk_appetite', 'Medium')
            recommendations = market_fetcher.get_recommendations(
                risk, context['timeline_months'], context['goal_amount']
            )
            
            print(format_investment_recommendations(
                recommendations, context['goal_amount'], context['timeline_months']
            ))
            
            profile = {
                'age': context.get('age', 30),
                'annual_income': context.get('annual_income', 800000),
                'monthly_savings': int(context['goal_amount'] / context['timeline_months']),
                'emergency_fund': 100000,
                'risk_appetite': risk,
                'investment_goal': 'Short-Term',
                'existing_investment_pct': 0.1
            }
            
            profile_df = pd.DataFrame([profile])
            allocation = model_pipeline.predict(profile_df)[0]
            
            print(f"\n📈 Suggested Portfolio Allocation:")
            print(f"   • Debt Instruments: {allocation[0]:.1f}%")
            print(f"   • Equity: {allocation[1]:.1f}%")
            print(f"   • Mutual Funds: {allocation[2]:.1f}%\n")
            
        else:
            profile = {
                'age': context.get('age', 30),
                'annual_income': context.get('annual_income', 800000),
                'monthly_savings': 20000,
                'emergency_fund': 100000,
                'risk_appetite': context.get('risk_appetite', 'Medium'),
                'investment_goal': 'Wealth Creation',
                'existing_investment_pct': 0.1
            }
            
            profile_df = pd.DataFrame([profile])
            prediction = model_pipeline.predict(profile_df)[0]
            
            print(f"\n🤖 Based on your profile, here's my recommendation:")
            print(f"   • Debt Instruments: {prediction[0]:.1f}%")
            print(f"   • Equity: {prediction[1]:.1f}%")
            print(f"   • Mutual Funds: {prediction[2]:.1f}%")
            print(f"   • Total: {sum(prediction):.1f}%\n")

def main():
    print("Initializing FinVerse AI...")
    generator = FinVerseDataGenerator()
    synthetic_data = generator.generate_synthetic_data(n_samples=8000)
    
    print("\nTraining Random Forest model...")
    finverse_model = FinVerseModel()
    model_pipeline = finverse_model.train_model(synthetic_data)
    finverse_model.save_model()
    
    print("\n✅ Model training complete. Launching enhanced chat interface...")
    run_enhanced_chat(model_pipeline)

if __name__ == "__main__":
    main()

Initializing FinVerse AI...

Training Random Forest model...

Model Performance:
----------------------------------------
Debt Allocation - R²: 0.9846, MAE: 0.8397
Equity Allocation - R²: 0.9864, MAE: 1.0830
Mutual Fund Allocation - R²: 0.9882, MAE: 0.9933
Model saved to finverse_model.pkl

✅ Model training complete. Launching enhanced chat interface...

💬 Welcome to FinVerse AI - Enhanced Investment Advisor!

I can help you with:
  • Portfolio allocation recommendations
  • Short-term goal planning (vacation, car, wedding, etc.)
  • Real-time investment suggestions from Indian markets

Examples:
  • 'I want to save 5L for a vacation in 6 months'
  • 'Need 20L for wedding in 2 years, medium risk'
  • 'I'm 30, earn 10L annually, want to invest for retirement'

Type 'exit' to quit.



🧑 You:  Need 20L for wedding in 2 years, medium risk



📊 Investment Recommendations for ₹2,000,000 in 24 months:

1. SBI Contra Dir Gr (0P0000XVJR.BO)
   Category: Mutual Fund
   Current NAV: ₹424.06
   1-Year Return: 0.97%
   Volatility: 12.33%
   Expected Value: ₹2,038,783

2. Franklin India Eq Hybrid Dir IDCW-P (0P0000XW5S.BO)
   Category: Mutual Fund
   Current NAV: ₹34.75
   1-Year Return: 15.02%
   Volatility: 13.53%
   Expected Value: ₹2,600,940

3. SBI Focused Equity Fund Dir Gr (0P0000XVJT.BO)
   Category: Mutual Fund
   Current NAV: ₹396.29
   1-Year Return: 7.28%
   Volatility: 12.44%
   Expected Value: ₹2,291,237


⚠️ Disclaimer: Past performance does not guarantee future results. This is not personalized financial advice. Please consult a financial advisor.


📈 Suggested Portfolio Allocation:
   • Debt Instruments: 4.6%
   • Equity: 34.4%
   • Mutual Funds: 61.0%



🧑 You:  I'm 30, earn 10L annually, want to invest for retirement



🤖 I see you want to save ₹30 for retirement. 


   When do you need this amount? (e.g., '6 months', '2 years'):  retirement


   Assuming 1 year timeline.

📊 Investment Recommendations for ₹30 in 12 months:

1. SBI Contra Dir Gr (0P0000XVJR.BO)
   Category: Mutual Fund
   Current NAV: ₹424.06
   1-Year Return: 0.97%
   Volatility: 12.33%
   Expected Value: ₹30


⚠️ Disclaimer: Past performance does not guarantee future results. This is not personalized financial advice. Please consult a financial advisor.


📈 Suggested Portfolio Allocation:
   • Debt Instruments: 4.6%
   • Equity: 34.2%
   • Mutual Funds: 61.2%



🧑 You:  I want to save 5L for a vacation in 6 months



📊 Investment Recommendations for ₹500,000 in 6 months:

1. Nippon India ETF Nifty 1D Rate Liquid BeES (LIQUIDBEES.NS)
   Category: Equity
   Current Price: ₹999.99
   1-Year Return: -0.00%
   Volatility: 0.01%
   Expected Value: ₹499,997

2. Fixed Deposit (FD)
   Category: Debt
   1-Year Return: 6.50%
   Volatility: 0.00%
   Expected Value: ₹516,250


⚠️ Disclaimer: Past performance does not guarantee future results. This is not personalized financial advice. Please consult a financial advisor.


📈 Suggested Portfolio Allocation:
   • Debt Instruments: 4.6%
   • Equity: 34.4%
   • Mutual Funds: 61.0%



🧑 You:  exit


👋 Thank you for using FinVerse AI. Goodbye!
