In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [4]:
# Load the data
data = pd.read_csv('financial_data.csv')

In [5]:
data.head()

Unnamed: 0,Age,Gender,Marital Status,Number of Dependents,Occupation,Income,Employment Type,Monthly Expenses,Debt,Savings,Short-term Goals,Medium-term Goals,Long-term Goals,Risk Tolerance,Preferred Investments,Healthcare Expenses,Tax Status
0,62,Female,Divorced,1,Doctor,35013,Part-time,48024,30339,0,9751,41657,40810,Moderate,Stocks,2877,Taxpayer
1,18,Male,Divorced,3,Doctor,45804,Self-employed,37894,6433,7910,8669,26818,17563,Moderate,Diversified,4047,Taxpayer
2,21,Female,Single,1,Accountant,74092,Self-employed,48220,9159,25872,7181,19177,11701,Moderate,Stocks,6137,Taxpayer
3,21,Female,Married,1,Sales Representative,63111,Full-time,38533,20888,24578,7469,47734,40147,Aggressive,Stocks,2206,Tax-Exempt
4,57,Male,Divorced,2,Lawyer,78735,Full-time,40577,11296,38158,5549,47332,30633,Moderate,Bonds,1468,Taxpayer


In [6]:

# Define a function to determine risk tolerance based on age, number of dependents, and investment
def determine_risk_tolerance_modified(row):
    age = row['Age']
    num_dependents = row['Number of Dependents']
    investment = row['Preferred Investments']

    # Modify the risk tolerance based on the criteria you want
    if age < 30 and num_dependents == 0:
        if investment == 'Stocks':
            return 'Aggressive'
        elif investment == 'Bonds':
            return 'Moderate'
        else:
            return 'Conservative'
    elif age >= 30 and num_dependents > 0:
        if investment == 'Stocks':
            return 'Moderate'
        elif investment == 'Bonds':
            return 'Moderate'
        else:
            return 'Conservative'
    else:
        return 'Conservative'

# Apply the function to determine risk tolerance
data['Risk Tolerance'] = data.apply(determine_risk_tolerance_modified, axis=1)


# Select numerical features for scaling
numerical_features = ['Age', 'Number of Dependents', 'Income', 'Monthly Expenses', 'Debt', 'Savings',
                      'Short-term Goals', 'Medium-term Goals', 'Long-term Goals', 'Healthcare Expenses']

# Scale numerical features
scaler = StandardScaler()
data[numerical_features] = scaler.fit_transform(data[numerical_features])





In [7]:

# Define a function to determine saving tolerance based on risk tolerance and income
def determine_saving_tolerance(row):
    risk_tolerance = row['Risk Tolerance']
    income = row['Income']

    if risk_tolerance == 'Aggressive':
        return income * 0.2 
    elif risk_tolerance == 'Moderate':
        return income * 0.15 
    else:
        return income * 0.1 
    
# Apply the function to determine saving tolerance
data['Saving Tolerance'] = data.apply(determine_saving_tolerance, axis=1)
    

In [8]:



# Define features (X) and target variable (y) for risk tolerance
X_risk = data[numerical_features]
y_risk = data['Risk Tolerance']  # Assuming 'Risk Tolerance' is the target variable

# Define features (X) and target variable (y) for saving tolerance
X_saving = data[numerical_features]
y_saving = data['Saving Tolerance']  # Use 'Saving Tolerance' as the new target variable

# Split the data into training and testing sets for risk tolerance
X_train_risk, X_test_risk, y_train_risk, y_test_risk = train_test_split(X_risk, y_risk, test_size=0.2, random_state=42)

# Split the data into training and testing sets for saving tolerance
X_train_saving, X_test_saving, y_train_saving, y_test_saving = train_test_split(X_saving, y_saving, test_size=0.2, random_state=42)

# Initialize and train a machine learning model for risk tolerance (Random Forest Classifier as an example)
model_risk = RandomForestClassifier(random_state=42)
model_risk.fit(X_train_risk, y_train_risk)

# Initialize and train a machine learning model for saving tolerance (Random Forest Regressor as an example)
model_saving = RandomForestRegressor(random_state=42)
model_saving.fit(X_train_saving, y_train_saving)

# Evaluate the model for risk tolerance
accuracy_risk = model_risk.score(X_test_risk, y_test_risk) * 100
print('Model accuracy for risk tolerance:', accuracy_risk)

# Evaluate the model for saving tolerance
r_squared_saving = model_saving.score(X_test_saving, y_test_saving) * 100 
print('Model R-squared for saving tolerance: {:.2f}%'.format(r_squared_saving))



Model accuracy for risk tolerance: 66.5
Model R-squared for saving tolerance: 93.45%
