In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')

# Create synthetic dataset
np.random.seed(42)

def generate_financial_data(n_samples=20):
    data = {
        'user_id': range(1, n_samples + 1),
        'annual_income': np.random.uniform(40000, 200000, n_samples),
        'monthly_expenses': np.random.uniform(2000, 8000, n_samples),
        'savings_rate': np.random.uniform(0.05, 0.4, n_samples),
        'age': np.random.randint(25, 65, n_samples),
        'debt_to_income': np.random.uniform(0.1, 0.6, n_samples),
        'risk_tolerance': np.random.randint(1, 6, n_samples),  # 1-5 scale
        'investment_horizon': np.random.randint(2, 21, n_samples)  # years
    }

    # Calculate derived features
    data['disposable_income'] = data['annual_income'] - (data['monthly_expenses'] * 12)
    data['savings_potential'] = data['disposable_income'] * data['savings_rate']

    return pd.DataFrame(data)

def determine_risk_profile(age, risk_tolerance, investment_horizon):
    # Calculate base risk score
    risk_score = (
        (6 - age/10) * 0.3 +  # Younger age allows more risk
        risk_tolerance * 0.4 +  # Self-reported risk tolerance
        (investment_horizon/20) * 0.3  # Longer horizon allows more risk
    )

    if risk_score < 2:
        return "Conservative"
    elif risk_score < 3:
        return "Moderate-Conservative"
    elif risk_score < 4:
        return "Moderate"
    elif risk_score < 4.5:
        return "Moderate-Aggressive"
    else:
        return "Aggressive"

def suggest_portfolio_allocation(risk_profile):
    portfolios = {
        "Conservative": {
            "Bonds": 0.60,
            "Large Cap Stocks": 0.20,
            "Mid Cap Stocks": 0.10,
            "International Stocks": 0.05,
            "Cash": 0.05
        },
        "Moderate-Conservative": {
            "Bonds": 0.45,
            "Large Cap Stocks": 0.25,
            "Mid Cap Stocks": 0.15,
            "International Stocks": 0.10,
            "Cash": 0.05
        },
        "Moderate": {
            "Bonds": 0.30,
            "Large Cap Stocks": 0.30,
            "Mid Cap Stocks": 0.20,
            "International Stocks": 0.15,
            "Cash": 0.05
        },
        "Moderate-Aggressive": {
            "Bonds": 0.20,
            "Large Cap Stocks": 0.35,
            "Mid Cap Stocks": 0.25,
            "International Stocks": 0.15,
            "Cash": 0.05
        },
        "Aggressive": {
            "Bonds": 0.10,
            "Large Cap Stocks": 0.40,
            "Mid Cap Stocks": 0.25,
            "International Stocks": 0.20,
            "Cash": 0.05
        }
    }
    return portfolios[risk_profile]

def calculate_investment_amount(income, expenses, savings_rate):
    disposable_income = income - (expenses * 12)
    recommended_investment = disposable_income * savings_rate
    return round(recommended_investment, 2)

def generate_investment_recommendation(user_data):
    """Generate personalized investment recommendations based on user data"""

    risk_profile = determine_risk_profile(
        user_data['age'],
        user_data['risk_tolerance'],
        user_data['investment_horizon']
    )

    investment_amount = calculate_investment_amount(
        user_data['annual_income'],
        user_data['monthly_expenses'],
        user_data['savings_rate']
    )

    portfolio_allocation = suggest_portfolio_allocation(risk_profile)

    recommendation = {
        'risk_profile': risk_profile,
        'recommended_annual_investment': investment_amount,
        'portfolio_allocation': {
            asset: round(percentage * investment_amount, 2)
            for asset, percentage in portfolio_allocation.items()
        }
    }

    return recommendation

# Generate dataset
df = generate_financial_data(20)

# Example usage
print("Generated Dataset:")
print(df.head())
print("\nSample Investment Recommendation:")
sample_user = df.iloc[0].to_dict()
recommendation = generate_investment_recommendation(sample_user)
print("\nRisk Profile:", recommendation['risk_profile'])
print("Recommended Annual Investment: $", recommendation['recommended_annual_investment'])
print("\nRecommended Portfolio Allocation:")
for asset, amount in recommendation['portfolio_allocation'].items():
    print(f"{asset}: ${amount:,.2f} ({amount/recommendation['recommended_annual_investment']*100:.1f}%)")

Generated Dataset:
   user_id  annual_income  monthly_expenses  savings_rate  age  \
0        1   99926.419016       5671.117368      0.092713   40   
1        2  192114.289026       2836.963164      0.223312   42   
2        3  157119.030690       3752.867891      0.062036   48   
3        4  135785.357472       4198.171060      0.368262   50   
4        5   64962.982471       4736.419905      0.140573   49   

   debt_to_income  risk_tolerance  investment_horizon  disposable_income  \
0        0.485635               1                  11       31873.010596   
1        0.137022               2                  14      158070.731059   
2        0.279233               2                   7      112084.615995   
3        0.157935               4                  13       85407.304754   
4        0.531552               5                  13        8125.943607   

   savings_potential  
0        2955.054613  
1       35299.078218  
2        6953.279264  
3       31452.276883  
4        114