In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Number of records
n_records = 1000

# Create synthetic data for BudgetWise
data = {
    'User_ID': np.arange(1, n_records + 1),
    'Income': np.random.normal(5000, 1000, n_records).round(2),  # Monthly income in dollars
    'Rent': np.random.normal(1500, 300, n_records).round(2),
    'Groceries': np.random.normal(300, 100, n_records).round(2),
    'Utilities': np.random.normal(200, 50, n_records).round(2),
    'Entertainment': np.random.normal(200, 80, n_records).round(2),
    'Savings': np.random.normal(500, 200, n_records).round(2),
    'Loan_Payment': np.random.choice([0, 300, 500], n_records),  # Loan payments, if any
    'Monthly_Budget_Limit': np.random.normal(3000, 500, n_records).round(2),  # Custom budget limit
}

# Calculate total monthly expenses
data['Total_Expenses'] = (
    data['Rent'] + data['Groceries'] + data['Utilities'] +
    data['Entertainment'] + data['Loan_Payment'] + data['Savings']
).round(2)

# Determine if the user exceeded their budget
data['Exceeded_Budget'] = np.where(data['Total_Expenses'] > data['Monthly_Budget_Limit'], 1, 0)

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("budgetwise_synthetic_data.csv", index=False)

print("Synthetic BudgetWise dataset created!")
df.head()


Synthetic BudgetWise dataset created!


Unnamed: 0,User_ID,Income,Rent,Groceries,Utilities,Entertainment,Savings,Loan_Payment,Monthly_Budget_Limit,Total_Expenses,Exceeded_Budget
0,1,5496.71,1919.81,232.48,104.61,130.92,415.25,300,2388.39,3103.07,1
1,2,4861.74,1777.39,285.55,156.98,197.5,409.32,0,3712.31,2826.74,0
2,3,5647.69,1517.89,220.76,179.32,201.44,140.87,500,3775.62,2760.28,0
3,4,6523.03,1305.92,269.2,294.38,237.81,433.98,300,3441.14,2841.29,0
4,5,4765.85,1709.47,110.64,227.83,90.65,646.57,500,2619.21,3285.16,1


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv("budgetwise_synthetic_data.csv")

# Features and target variable
X = df[['Income', 'Rent', 'Groceries', 'Utilities', 'Entertainment', 'Savings', 'Loan_Payment']]
y = df['Exceeded_Budget']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train a RandomForest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Model Accuracy:", accuracy)
print("Classification Report:\n", report)

# Add a message for each user based on the budget outcome
X_test = X_test.reset_index(drop=True)
budget_test = df[['Monthly_Budget_Limit', 'Total_Expenses']].iloc[y_test.index].reset_index(drop=True)
predictions = pd.DataFrame({'Exceeded_Budget': y_pred, 'Budget_Limit': budget_test['Monthly_Budget_Limit'], 'Total_Expenses': budget_test['Total_Expenses']})

# Add custom messages
def budget_message(row):
    if row['Exceeded_Budget'] == 1:
        excess_amount = row['Total_Expenses'] - row['Budget_Limit']
        return f"You have exceeded your budget by ${excess_amount:.2f}. Consider reviewing your expenses."
    else:
        remaining_amount = row['Budget_Limit'] - row['Total_Expenses']
        return f"You are within your budget! You have ${remaining_amount:.2f} left for savings or other expenses."

predictions['Message'] = predictions.apply(budget_message, axis=1)

# Display a sample of custom messages
print("\nSample Budget Messages:")
print(predictions[['Total_Expenses', 'Budget_Limit', 'Message']].head(10))


Model Accuracy: 0.7033333333333334
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.78      0.71       139
           1       0.77      0.64      0.70       161

    accuracy                           0.70       300
   macro avg       0.71      0.71      0.70       300
weighted avg       0.71      0.70      0.70       300


Sample Budget Messages:
   Total_Expenses  Budget_Limit  \
0         3302.14       3161.25   
1         2991.05       2685.48   
2         3083.61       2041.33   
3         3567.17       3542.61   
4         3190.90       2984.24   
5         3209.47       1967.35   
6         3176.60       2804.80   
7         2004.06       2187.25   
8         3150.13       2765.33   
9         3206.04       2510.85   

                                             Message  
0  You have exceeded your budget by $140.89. Cons...  
1  You are within your budget! You have $-305.57 ...  
2  You have exceeded your budget by $1