In [53]:
!pip install scikit-learn pandas joblib



Import Libraries

In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
import joblib

Load Dataset

In [55]:
df = pd.read_csv("DatasetFinalCSV.csv")
print("Columns in dataset:", df.columns.tolist())
df.head()

Columns in dataset: ['User', 'Month', 'Monthly_Budget', 'Grocery_Ini1', 'Rent_Ini2', 'Transport_Ini3', 'InitialExpense', 'Have_Balance', 'OtherExpense_name', 'AmountOfProduct', 'Remaining_Balance', 'Purchase_Allowed']


Unnamed: 0,User,Month,Monthly_Budget,Grocery_Ini1,Rent_Ini2,Transport_Ini3,InitialExpense,Have_Balance,OtherExpense_name,AmountOfProduct,Remaining_Balance,Purchase_Allowed
0,User1,January,10000,2000,2500,1000,5500,4500,Shoes,3000,1500,"Yes, But check 'Have Balance'."
1,User1,February,15000,2500,2500,1000,6000,9000,Laptop,15000,-6000,"No,You have not enough Balance."
2,User1,March,13000,2500,2500,1200,6200,6800,Watch,7000,-200,"No,You have not enough Balance."
3,User1,April,15666,2100,2500,1100,5700,9966,Dress,2000,7966,"Yes, But check 'Have Balance'."
4,User1,May,17166,2350,2500,1200,6050,11116,Vacation,5000,6116,"Yes, But check 'Have Balance'."


Column Identification

In [56]:
budget_col ="Monthly_Budget"
product_price_col ="AmountOfProduct"
month_col = "Month"
target_col = df.columns[-1]

In [57]:
ignore_cols = [budget_col, product_price_col, month_col, target_col]
expense_cols = ['Grocery_Ini1', 'Rent_Ini2', 'Transport_Ini3']

In [58]:
print("Budget column:", budget_col)
print("Product price column:", product_price_col)
print("Month column:", month_col)
print("Expense columns:", expense_cols)
print("Target column:", target_col)

Budget column: Monthly_Budget
Product price column: AmountOfProduct
Month column: Month
Expense columns: ['Grocery_Ini1', 'Rent_Ini2', 'Transport_Ini3']
Target column: Purchase_Allowed


Total Initial

In [59]:
for c in expense_cols:
    df[c] = pd.to_numeric(df[c].astype(str).str.replace(r'[^\d.\-]','',regex=True), errors='coerce').fillna(0)

df['initial_total'] = df[expense_cols].sum(axis=1)


In [60]:
print("Final expense columns:", expense_cols)
df[['Monthly_Budget'] + expense_cols + ['initial_total']].head()

Final expense columns: ['Grocery_Ini1', 'Rent_Ini2', 'Transport_Ini3']


Unnamed: 0,Monthly_Budget,Grocery_Ini1,Rent_Ini2,Transport_Ini3,initial_total
0,10000,2000,2500,1000,5500
1,15000,2500,2500,1000,6000
2,13000,2500,2500,1200,6200
3,15666,2100,2500,1100,5700
4,17166,2350,2500,1200,6050


Prepare Features and Target

In [61]:
X = df[[budget_col] + expense_cols + [product_price_col, month_col, "initial_total"]]
y = df[target_col]

In [62]:
#encoding Target labels with 0/1
label_encoder = LabelEncoder()
y_enc = label_encoder.fit_transform(y)

In [63]:
numeric_features = [budget_col, product_price_col, "initial_total"] + expense_cols
categorical_features = [month_col]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

Model Building

In [64]:
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("clf", RandomForestClassifier(n_estimators=200, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

print("Training complete")
print("Train Accuracy:", pipeline.score(X_train, y_train))
print("Test Accuracy:", pipeline.score(X_test, y_test))

Training complete
Train Accuracy: 1.0
Test Accuracy: 0.9166666666666666


In [65]:
#Model Saving
joblib.dump({
    "pipeline": pipeline,
    "label_encoder": label_encoder,
    "feature_cols": X.columns.tolist(),
    "budget_col": budget_col,
    "product_price_col": product_price_col,
    "expense_cols": expense_cols,
    "month_col": month_col
}, "rf_pipeline.pkl")

print("Model saved as rf_pipeline.pkl")

Model saved as rf_pipeline.pkl


Prediction

In [68]:

def predict_user_input(month, budget, expenses_dict, product_name, product_price, top_n_suggestions=5):
    # Build input row
    row = {}
    row[budget_col] = budget

    # Fill in expense values
    for c in expense_cols:
        row[c] = float(expenses_dict.get(c, 0))

    # Compute initial total
    row["initial_total"] = sum(float(expenses_dict.get(c, 0)) for c in expense_cols)

    row[product_price_col] = product_price
    row[month_col] = month

    # Ensure correct feature order
    input_df = pd.DataFrame([row], columns=X.columns)

    # Make prediction
    pred_enc = pipeline.predict(input_df)[0]
    pred_label = label_encoder.inverse_transform([pred_enc])[0]

    # Balance calculation
    balance = budget - row["initial_total"]

    # Suggestions
    suggestions = []
    if str(pred_label).strip().lower() not in ["yes", "y", "true", "1"]:
        cand = df[df[product_price_col] <= balance]
        if not cand.empty:
            suggestions = cand.sort_values(by=product_price_col, ascending=False).head(top_n_suggestions)

    return {
        "initial_total": row["initial_total"],
        "balance": balance,
        "prediction": str(pred_label),
        "suggestions": suggestions
    }


Test

In [69]:
month = input("Enter Month (e.g., January): ")
budget = float(input("Enter your total budget: "))

expenses_dict = {}
for col in expense_cols:
    val = input(f"Enter expense for {col} (leave blank for 0): ")
    expenses_dict[col] = float(val) if val.strip() != "" else 0.0

product_name = input("Enter the product name you want to buy: ")
product_price = float(input("Enter approximate price of the product: "))

2
result = predict_user_input(month, budget, expenses_dict, product_name, product_price, top_n_suggestions=5)

# Show results
print("\n===== RESULT =====")
print(f"Initial Expense Total: {result['initial_total']}")
print(f"Remaining Balance: {result['balance']}")
print(f"Prediction (Can Buy?): {result['prediction']}")

if result['prediction'].strip().lower() == "no":
    print("\nSuggested Products within your balance:")
    if result['suggestions'] is not None and not result['suggestions'].empty:
        print(result['suggestions'][[product_price_col, month_col]].to_string(index=False))
    else:
        print("No alternative products found within balance.")

Enter Month (e.g., January): January
Enter your total budget: 20000
Enter expense for Grocery_Ini1 (leave blank for 0): 2000
Enter expense for Rent_Ini2 (leave blank for 0): 2500
Enter expense for Transport_Ini3 (leave blank for 0): 1350
Enter the product name you want to buy: Bed
Enter approximate price of the product: 35000

===== RESULT =====
Initial Expense Total: 5850.0
Remaining Balance: 14150.0
Prediction (Can Buy?): No,You have not enough Balance.
