In [None]:
import pandas as pd
import psycopg2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tkinter import Tk, Label, Entry, Button, StringVar, messagebox
import pickle
import numpy as np
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

# Step 1: Connect to the PostgreSQL database with error handling
def connect_to_db():
    try:
        conn = psycopg2.connect(
            host="branchhomeworkdb.cv8nj4hg6yra.ap-south-1.rds.amazonaws.com",
            port=5432,
            user="datascientist",
            password="47eyYBLT0laW5j9U24Uuy8gLcrN",
            database="branchdsprojectgps"
        )
        return conn
    except psycopg2.Error as e:
        messagebox.showerror("Database Connection Error", str(e))
        return None

# Step 2: Fetch combined user data (features and loan outcome)
def fetch_data():
    conn = connect_to_db()
    if conn is not None:
        query = """
        SELECT ua.user_id, ua.age, ua.cash_incoming_30days, lo.loan_outcome
        FROM user_attributes ua
        JOIN loan_outcomes lo ON ua.user_id = lo.user_id
        """
        data = pd.read_sql(query, conn)
        conn.close()
        return data
    return pd.DataFrame()

# Load the saved model
def load_model():
    with open('loan_model.pkl', 'rb') as f:
        return pickle.load(f)

# Step 3: Prepare the dataset and split it
user_data = fetch_data()

if not user_data.empty:
    # Split the features and target
    X = user_data[['age', 'cash_incoming_30days']]  # Features
    y = user_data['loan_outcome'].apply(lambda x: 1 if x == 'yes' else 0)  # Target (binary)

    # Split into train and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Save the trained model
    with open('loan_model.pkl', 'wb') as f:
        pickle.dump(model, f)

    # Load the model
    model = load_model()

    # Step 4: Make a prediction for a specific user
    def predict_user_outcome(user_id):
        user_row = user_data[user_data['user_id'] == user_id]
        if not user_row.empty:
            features = user_row[['age', 'cash_incoming_30days']]
            prediction = model.predict(features)
            probabilities = model.predict_proba(features)
            outcome = "Loan Outcome: Repaid" if prediction[0] == 1 else "Loan Outcome: Defaulted"
            return outcome, probabilities[0]
        else:
            return "User ID not found.", None

    # Step 5: Plot model performance
    def plot_model_performance(X_test, y_test, model):
        # Predict on the test set
        y_pred = model.predict(X_test)

        # Accuracy
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Model Accuracy: {accuracy * 100:.2f}%")

        # Confusion matrix
        cm = confusion_matrix(y_test, y_pred)

        plt.figure(figsize=(6,6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
        plt.title("Confusion Matrix")
        plt.xlabel("Predicted Label")
        plt.ylabel("True Label")
        plt.show()

    # GUI Implementation using Tkinter
    def predict_outcome():
        user_id = entry_user_id.get()
        if not user_id.isdigit():
            result.set("Invalid user ID. Please enter a numeric user ID.")
            return

        outcome, probabilities = predict_user_outcome(int(user_id))
        result.set(outcome)
        
        if probabilities is not None:
            age_label.set(f"Age: {user_data[user_data['user_id'] == int(user_id)]['age'].values[0]}")
            income_label.set(f"Cash Incoming (30 days): {user_data[user_data['user_id'] == int(user_id)]['cash_incoming_30days'].values[0]}")
            prob_repaid.set(f"Repaid Probability: {probabilities[1] * 100:.2f}%")
            prob_defaulted.set(f"Defaulted Probability: {probabilities[0] * 100:.2f}%")
            
            # Plot pie chart for prediction probabilities
            plot_probabilities(probabilities)

    # Function to clear fields
    def clear_fields():
        entry_user_id.delete(0, 'end')
        result.set("")
        age_label.set("")
        income_label.set("")
        prob_repaid.set("")
        prob_defaulted.set("")
        clear_plot()

  

    # Clear plot canvas
    def clear_plot():
        for widget in root.pack_slaves():
            if isinstance(widget, FigureCanvasTkAgg):
                widget.get_tk_widget().destroy()

    # Initialize the GUI
    root = Tk()
    root.title("Loan Outcome Predictor")

    Label(root, text="Enter User ID:").pack()
    entry_user_id = Entry(root)
    entry_user_id.pack()

    result = StringVar()
    age_label = StringVar()
    income_label = StringVar()
    prob_repaid = StringVar()
    prob_defaulted = StringVar()

    Label(root, textvariable=age_label).pack()
    Label(root, textvariable=income_label).pack()
    Label(root, textvariable=prob_repaid).pack()
    Label(root, textvariable=prob_defaulted).pack()
    Label(root, textvariable=result).pack()

    Button(root, text="Predict", command=predict_outcome).pack()
    Button(root, text="Clear", command=clear_fields).pack()

    # Run the GUI main loop
    root.mainloop()

else:
    print("No data available for training.")


  data = pd.read_sql(query, conn)
Exception in Tkinter callback
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/r7/_qf_l8597dx_rlldg3l3l2_40000gn/T/ipykernel_16562/2543643889.py", line 113, in predict_outcome
    prob_repaid.set(f"Repaid Probability: {probabilities[1] * 100:.2f}%")
IndexError: index 1 is out of bounds for axis 0 with size 1
Exception in Tkinter callback
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/r7/_qf_l8597dx_rlldg3l3l2_40000gn/T/ipykernel_16562/2543643889.py", line 113, in predict_outcome
    prob_repaid.set(f"Repaid Probability: {probabilities[1] * 100:.2f}%")
IndexError: index 1 is out of bounds for axis 0 with size 1
Exception in Tkinter callback
Traceback (m