In [1]:
#FinAssist: Investment Advisor

In [2]:
# SECTION 1: Imports 
import tkinter as tk
from tkinter import messagebox, ttk
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from xgboost import XGBClassifier

In [3]:
# SECTION 2: Full-Feature Model 
full_df = pd.read_csv("finance_data.csv").iloc[:2500]
features = ['Age', 'Income Level','Investment Goals','Loan Status','Employment Status']
target = 'Risk Tolerance'

# Handle missing values
for col in features + [target]:
    if full_df[col].dtype == 'object':
        full_df[col] = full_df[col].fillna(full_df[col].mode()[0])
    else:
        full_df[col] = full_df[col].fillna(full_df[col].median())

# Creating additional features
full_df['Net Savings'] = full_df['Deposits'] - full_df['Withdrawals']
full_df['Loan to Income Ratio'] = full_df['Loan Amount'] / (full_df['Account Balance'].replace(0, np.nan) + 1)
full_df['Investment Ratio'] = full_df['Investments'] / (full_df['Account Balance'].replace(0, np.nan) + 1)
full_df[['Net Savings', 'Loan to Income Ratio', 'Investment Ratio']] = full_df[
    ['Net Savings', 'Loan to Income Ratio', 'Investment Ratio']].fillna(0)
features += ['Net Savings', 'Loan to Income Ratio', 'Investment Ratio']

# Label encode categorical variables
categorical_cols = ['Occupation', 'Income Level', 'Loan Status', 'Employment Status', 'Risk Tolerance', 'Investment Goals']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    full_df[col] = le.fit_transform(full_df[col])
    label_encoders[col] = le

# Train full model 
X_full = full_df[features]
y_full = full_df[target]
X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(X_full, y_full, test_size=0.2, random_state=42)
full_model = XGBClassifier(
    use_label_encoder=False, eval_metric='mlogloss',
    learning_rate=0.1, max_depth=6, n_estimators=200,
    subsample=0.8, colsample_bytree=0.8, random_state=42
)
full_model.fit(X_train_f, y_train_f)
full_pred = full_model.predict(X_test_f)
full_accuracy = 100 * (full_pred == y_test_f).mean()
#print('Model_Accuracy:', full_accuracy)

# SECTION 3: GUI Model Training 
df = pd.read_csv('finance_data.csv')
df = df[['Risk Tolerance', 'Age', 'Income Level', 'Employment Status', 'Loan Status']]
df.columns = ['RiskTolerance', 'Age', 'IncomeLevel', 'EmploymentStatus', 'LoanStatus']

# Clean and impute income values
df['IncomeLevel'] = df['IncomeLevel'].astype(str).str.replace('?', '', regex=False).str.replace(',', '', regex=False).str.strip()
df['IncomeLevel'] = pd.to_numeric(df['IncomeLevel'], errors='coerce')
df['IncomeLevel'] = SimpleImputer(strategy='median').fit_transform(df[['IncomeLevel']])

# Clean age values and remove incomplete rows
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
df.dropna(subset=['Age', 'RiskTolerance', 'EmploymentStatus', 'LoanStatus'], inplace=True)

# Categorize age and income into groups
df['AgeGroup'] = pd.cut(df['Age'], bins=[17, 25, 35, 50, 65, 100],
                        labels=['18-25', '26-35', '36-50', '51-65', '66+'])
df['IncomeGroup'] = pd.cut(df['IncomeLevel'], bins=[0, 30000, 70000, float('inf')],
                           labels=['Below ₹30,000', '₹30,000 to ₹70,000', 'Above ₹70,000'])

# Encode target labels
risk_map = {'Low': 0, 'Medium': 1, 'High': 2}
df['RiskToleranceNum'] = df['RiskTolerance'].map(risk_map)

# One-hot encode features for GUI model
features_gui = ['AgeGroup', 'IncomeGroup', 'EmploymentStatus', 'LoanStatus']
df_features = pd.get_dummies(df[features_gui], drop_first=True)
X_gui = df_features
y_gui = df['RiskToleranceNum']
X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(X_gui, y_gui, test_size=0.2, random_state=42, stratify=y_gui)

# Train GUI model
model_gui = XGBClassifier(
    use_label_encoder=False, eval_metric='mlogloss',
    random_state=42, max_depth=4, learning_rate=0.1,
    n_estimators=200, subsample=0.8, colsample_bytree=0.8
)
model_gui.fit(X_train_g, y_train_g)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [4]:
# SECTION 4: GUI Launch Function
def launch_gui():
    investment_goals_options = ["Wealth preservation", "Speculation", "Income generation", "Growth"]

    def on_submit():
        try:
            # Gather user inputs
            age_group = age_var.get()
            income_group = income_var.get()
            employment = employment_var.get()
            loan_status = loan_var.get()
            investment_goal = invest_goal_var.get()
            invest_amount = float(invest_var.get())

            # Prepareinput for model
            input_dict = dict.fromkeys(df_features.columns, 0)
            for cat, val in [('AgeGroup', age_group), ('IncomeGroup', income_group),
                             ('EmploymentStatus', employment), ('LoanStatus', loan_status)]:
                key = cat + '_' + val
                if key in input_dict:
                    input_dict[key] = 1
            input_df = pd.DataFrame([input_dict])

            # Predict risk tolerance
            risk_pred = model_gui.predict(input_df)[0]
            risk_label = {0: 'Low', 1: 'Medium', 2: 'High'}[risk_pred]

            # Suggest portfolio allocation
            if risk_pred == 2:
                allocation = {'Stocks': 70, 'Gold': 10, 'FD': 20}
            elif risk_pred == 1:
                allocation = {'Stocks': 50, 'Gold': 20, 'FD': 30}
            else:
                allocation = {'Stocks': 20, 'Gold': 20, 'FD': 60}

            result = f"Risk Profile: {risk_label}\nInvestment Goal: {investment_goal}\n\nSuggested Allocation:\n"
            for asset, percent in allocation.items():
                amount = round((percent / 100) * invest_amount, 2)
                result += f"{asset}: {percent}% (₹{amount})\n"
            messagebox.showinfo("Investment Suggestion", result)

            # Clear previous plots
            for widget in chart_frame.winfo_children():
                widget.destroy()

            # Portfolio pie chart
            fig1, ax1 = plt.subplots(figsize=(4, 4))
            ax1.pie(allocation.values(), labels=allocation.keys(), autopct='%1.1f%%',
                    colors=['skyblue', 'gold', 'lightgreen'])
            ax1.set_title("Portfolio Split")
            FigureCanvasTkAgg(fig1, chart_frame).get_tk_widget().pack(side='left', padx=10)

            # Projected growth line chart
            base_growth_rate = 0.12
            volatility = 0.05
            fd_rate = 0.06
            gold_rate = 0.08
            years = list(range(1, 6))
            portfolio_growth, current_value = [], invest_amount
            for _ in years:
                current_value *= (1 + base_growth_rate + np.random.uniform(-volatility, volatility))
                portfolio_growth.append(current_value)
            fd_growth = [invest_amount * ((1 + fd_rate) ** y) for y in years]
            gold_growth = [invest_amount * ((1 + gold_rate) ** y) for y in years]

            fig2, ax2 = plt.subplots(figsize=(5, 4))
            ax2.plot(years, portfolio_growth, marker='o', label='Recommended Portfolio', color='blue')
            ax2.plot(years, fd_growth, '--o', label='FD Only (6%)', color='green')
            ax2.plot(years, gold_growth, '--o', label='Gold Only (8%)', color='orange')
            ax2.set(title="Projected Growth Over 5 Years", xlabel="Year", ylabel="Value in ₹")
            ax2.legend()
            ax2.grid(True)
            FigureCanvasTkAgg(fig2, chart_frame).get_tk_widget().pack(side='right', padx=10)

        except Exception as e:
            messagebox.showerror("Error", str(e))

    # SECTION 5: GUI Layout
    win = tk.Tk()
    win.title("FinAssist - Investment Advisor")
    win.geometry("950x750")

    input_frame = ttk.LabelFrame(win, text="User Inputs")
    input_frame.pack(padx=10, pady=10, fill="x")

    chart_frame = ttk.LabelFrame(win, text="Analysis Output")
    chart_frame.pack(padx=10, pady=10, fill="both", expand=True)

    # Input fields
    ttk.Label(input_frame, text="Age Group").grid(row=0, column=0, padx=5, pady=5, sticky='e')
    age_var = tk.StringVar()
    ttk.OptionMenu(input_frame, age_var, "18-25", "18-25", "26-35", "36-50", "51-65", "66+").grid(row=0, column=1)

    ttk.Label(input_frame, text="Monthly Income").grid(row=1, column=0, padx=5, pady=5, sticky='e')
    income_var = tk.StringVar()
    ttk.OptionMenu(input_frame, income_var, "Below ₹30,000", "Below ₹30,000", "₹30,000 to ₹70,000", "Above ₹70,000").grid(row=1, column=1)

    ttk.Label(input_frame, text="Employment Status").grid(row=2, column=0, padx=5, pady=5, sticky='e')
    employment_var = tk.StringVar()
    ttk.OptionMenu(input_frame, employment_var, "Salaried", "Salaried", "Self Employed", "Student", "Retired", "Unemployed").grid(row=2, column=1)

    ttk.Label(input_frame, text="Loan Status").grid(row=3, column=0, padx=5, pady=5, sticky='e')
    loan_var = tk.StringVar()
    ttk.OptionMenu(input_frame, loan_var, "No", "No", "Yes").grid(row=3, column=1)

    ttk.Label(input_frame, text="Investment Goal").grid(row=4, column=0, padx=5, pady=5, sticky='e')
    invest_goal_var = tk.StringVar()
    ttk.OptionMenu(input_frame, invest_goal_var, investment_goals_options[0], *investment_goals_options).grid(row=4, column=1)

    ttk.Label(input_frame, text="Investment Amount (₹)").grid(row=5, column=0, padx=5, pady=5, sticky='e')
    invest_var = tk.StringVar()
    ttk.Entry(input_frame, textvariable=invest_var).grid(row=5, column=1)

    ttk.Button(input_frame, text="Get Recommendation", command=on_submit).grid(row=6, column=0, columnspan=2, pady=10)

    win.mainloop()

# Run the GUI
launch_gui()