# Creating Dummy data for testing

In [6]:
import pandas as pd

# Create a dictionary with the data provided
data = {
    "age": [30, 50, 38, 53, 28, 37, 49, 52, 31, 42],
    "workclass": ["Self-emp-inc", "Private", "Private", "Private", "Private", "Private", "Private", "Self-emp-inc", "Private", "Private"],
    "education_level": ["Bachelors", "11th", "Bachelors", "HS-grad", "Masters", "Some-colle", "9th", "HS-grad", "14", "Bachelors"],
    "education-num": [13, 7, 13, 9, 14, 10, 5, 9, 14, 13],
    "marital-stat": ["Married-civ", "Married-civ", "Married-civ", "Married-civ", "Married-civ", "Married-civ", "Married-sp", "Married-civ", "Never-married", "Married-civ"],
    "occupation": ["Exec-mana", "Handlers-cl", "Prof-specia", "Handlers-cl", "Exec-mana", "Exec-mana", "Other-servi", "Exec-mana", "Prof-specia", "Exec-mana"],
    "relationship": ["Husband", "Husband", "Wife", "Husband", "Wife", "Husband", "Not-in-fam", "Husband", "Not-in-fam", "Husband"],
    "race": ["White", "Black", "White", "Black", "Black", "White", "Black", "White", "White", "White"],
    "sex": ["Male", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male"],
    "capital-gain": [2174, 0, 0, 0, 0, 0, 0, 0, 14084, 5178],
    "capital-loss": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    "hours-per-week": [13, 40, 40, 40, 40, 40, 16, 45, 50, 40],
    "native-country": ["United-Stat", "United-Stat", "Cuba", "United-Stat", "United-Stat", "United-Stat", "Jamaica", "United-Stat", "United-Stat", "United-Stat"],
    "income": [">50K", "<=50K", "<=50K", "<=50K", "<=50K", ">50K", "<=50K", ">50K", ">50K", ">50K"],
    "Name": ["John Smith", "David Johnson", "Emily Williams", "Michael Brown", "Ashley Taylor", "James Anderson", "Sarah Martinez", "Thomas Wilson", "Jennifer Lee", "Robert Davis"]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
file_path = r"C:\Users\hp\Downloads\dummy_data.csv"
df.to_csv(file_path, index=False)

file_path


'C:\\Users\\hp\\Downloads\\dummy_data.csv'

# Frontend code using Tkinter

In [12]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import joblib

# Load the trained model
model = joblib.load('final_model.pkl')

# Load model columns for preprocessing
model_columns = joblib.load('model_columns.pkl')

# Global variable to store preprocessed data
preprocessed_data = None
original_data = None

# Define the function to select a file
def upload_file():
    global original_data
    file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx")])
    if file_path:
        try:
            if file_path.endswith('.csv'):
                original_data = pd.read_csv(file_path)
            else:
                original_data = pd.read_excel(file_path)
            upload_status_label.config(text="File uploaded successfully!", fg="green")
        except Exception as e:
            upload_status_label.config(text=f"Error: {e}", fg="red")

# Preprocess the data as done in the backend
def preprocess_data():
    global preprocessed_data, original_data
    if original_data is None:
        preprocess_status_label.config(text="Please upload a file first!", fg="red")
        return

    try:
        data = original_data.copy()

        # Apply log transformation for skewed features
        skewed = ['capital-gain', 'capital-loss']
        data[skewed] = data[skewed].apply(lambda x: np.log(x + 1))

        # MinMax scaling for numerical features
        numerical = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
        scaler = MinMaxScaler()
        data[numerical] = scaler.fit_transform(data[numerical])

        # One-hot encoding for categorical features
        data = pd.get_dummies(data)

        # Identify missing columns
        missing_cols = [col for col in model_columns if col not in data]
        missing_data = pd.DataFrame(0, index=data.index, columns=missing_cols)

        # Concatenate the existing data with missing columns filled with zeros
        data = pd.concat([data, missing_data], axis=1)

        # Reorder columns to match model training
        preprocessed_data = data[model_columns]
        preprocess_status_label.config(text="Data preprocessed successfully!", fg="green")
    except Exception as e:
        preprocess_status_label.config(text=f"Error: {e}", fg="red")

# Function to show capable donors
def show_donors():
    global preprocessed_data, original_data
    if preprocessed_data is None:
        messagebox.showerror("Error", "Please preprocess the data first!")
        return

    try:
        predictions = model.predict(preprocessed_data)
        capable_donors = original_data[predictions == 1]  # Assuming 1 represents capable donors

        if capable_donors.empty:
            output_label.config(text="No capable donors found.")
        else:
            donor_names = capable_donors['Name']  # Assuming there's a 'Name' column in the data
            output_label.config(text=f"Capable donors:\n\n" + "\n".join(donor_names))
    except Exception as e:
        messagebox.showerror("Error", f"Error during prediction: {e}")

# Create the main window
root = tk.Tk()
root.title("Find Capable Donor")
root.geometry("800x600")
root.configure(bg="#f0f4c3")  # Light background color

# Create and place the heading label
heading_label = tk.Label(root, text="Find Capable Donor", font=("Helvetica", 18, "bold"), bg="#689f38", fg="white")
heading_label.pack(pady=20)

# Create and place the upload button and status label
upload_button = tk.Button(root, text="Upload File", command=upload_file, font=("Helvetica", 14), bg="#8bc34a", fg="white", width=15)
upload_button.pack(pady=10)
upload_status_label = tk.Label(root, text="", font=("Helvetica", 12), bg="#f0f4c3")
upload_status_label.pack(pady=5)

# Create and place the analyze button and status label
analyze_button = tk.Button(root, text="Analyze Data", command=preprocess_data, font=("Helvetica", 14), bg="#009688", fg="white", width=15)
analyze_button.pack(pady=10)
preprocess_status_label = tk.Label(root, text="", font=("Helvetica", 12), bg="#f0f4c3")
preprocess_status_label.pack(pady=5)

# Create and place the show donors button
show_button = tk.Button(root, text="Show Capable Donors", command=show_donors, font=("Helvetica", 14), bg="#00796b", fg="white", width=20)
show_button.pack(pady=20)

# Create a label to display the output
output_label = tk.Label(root, text="", font=("Helvetica", 12), bg="#f0f4c3", wraplength=600, justify="left")
output_label.pack(pady=40)

# Start the Tkinter loop
root.mainloop()
