In [None]:
import tkinter as tk
from tkinter import messagebox, filedialog
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
import pickle
import os

# ---------------------
# Load dataset
# ---------------------
def load_data():
    path = filedialog.askopenfilename(
        title="Select Student Stress CSV",
        filetypes=[("CSV files", "*.csv")]
    )
    if not path:
        messagebox.showerror("Error", "No file selected!")
        exit()
    df = pd.read_csv(path).dropna()
    return df, path

data, DATA_PATH = load_data()

# Identify target column
target_col = [c for c in data.columns if 'stress levels' in c.lower()][0]
X = data.drop(target_col, axis=1)
y = data[target_col]

# Map stress levels to 0,1,2
y = y.apply(lambda x: 0 if x <= 2 else (1 if x == 3 else 2))

# Features
FEATURE_NAMES = X.columns.tolist()

# Some domain-specific feature groups
QUALITY_FEATURES = [
    'Kindly Rate your Sleep Quality',
    'How would you rate you academic performance ?'
]
LOAD_OR_FREQUENCY_FEATURES = [
    'How many times a week do you suffer headaches ?',
    'how would you rate your study load?',
    'How many times a week you practice extracurricular activities ?'
]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---------------------
# Model handling
# ---------------------
MODEL_FILE = 'student_stress_model.pkl'
if os.path.exists(MODEL_FILE):
    with open(MODEL_FILE, 'rb') as f:
        model = pickle.load(f)
else:
    model = make_pipeline(StandardScaler(),
                          LogisticRegression(multi_class='multinomial', max_iter=1000))
    model.fit(X_train, y_train)
    with open(MODEL_FILE, 'wb') as f:
        pickle.dump(model, f)

# ---------------------
# Helper functions
# ---------------------
def format_label(col):
    label = col.replace('?', '').replace('_', ' ').strip().capitalize()
    if col in QUALITY_FEATURES:
        guide = "(1=Poor, 5=Excellent)"
    elif col in LOAD_OR_FREQUENCY_FEATURES:
        guide = "(1=High Stress, 5=Low Stress)"
    else:
        guide = "(1=Poor, 5=Good)"
    return f"{label} {guide}:"

def validate_input(value):
    return value.isdigit() and 1 <= int(value) <= 5

def invert_features(user_vals):
    vals = user_vals.copy()
    for i, f in enumerate(FEATURE_NAMES):
        if f in LOAD_OR_FREQUENCY_FEATURES:
            vals[i] = 6 - vals[i]  # invert scale
    return vals

def predict_stress():
    try:
        user_vals = [float(e.get()) for e in entries]
        user_vals_scaled = invert_features(user_vals)
        prediction = model.predict([user_vals_scaled])[0]
        result_text = ["Low Stress (No Depression)", "Medium Stress (At Risk)", "High Stress (Depression)"][prediction]
        messagebox.showinfo("Prediction", f"Your predicted stress level:\n\n{result_text}")
        show_comparison(user_vals_scaled)
    except ValueError:
        messagebox.showerror("Input Error", "Please enter numbers 1-5 for all fields.")
    except Exception as e:
        messagebox.showerror("Unexpected Error", str(e))

def show_comparison(user_vals):
    avg_vals = []
    for f in FEATURE_NAMES:
        col = data[f]
        avg_vals.append(6 - col.mean() if f in LOAD_OR_FREQUENCY_FEATURES else col.mean())

    fig, ax = plt.subplots(figsize=(8, 4))
    x = np.arange(len(FEATURE_NAMES))
    width = 0.35

    ax.bar(x, avg_vals, width, label='Average Student', color='#2196F3', alpha=0.7)
    ax.bar(x + width, user_vals, width, label='You', color='#4CAF50', alpha=0.7)

    ax.set_ylabel('Score (1-5)')
    ax.set_title('Your Inputs vs Average Student')
    ax.set_xticks(x + width / 2)
    ax.set_xticklabels([f[:20] + "..." if len(f) > 20 else f for f in FEATURE_NAMES], rotation=45, ha='right')
    ax.set_ylim(0, 5)
    ax.legend()
    plt.tight_layout()

    canvas = FigureCanvasTkAgg(fig, master=root)
    canvas.get_tk_widget().grid(row=len(FEATURE_NAMES)+2, column=0, columnspan=2, pady=10)
    canvas.draw()

# ---------------------
# GUI setup
# ---------------------
root = tk.Tk()
root.title("Student Stress Predictor")
root.geometry("800x600")
root.resizable(False, False)

entries = []
vcmd = (root.register(validate_input), "%P")

for i, col in enumerate(FEATURE_NAMES):
    tk.Label(root, text=format_label(col), anchor='w', font=('Arial', 10)).grid(row=i, column=0, sticky='w', padx=10, pady=5)
    e = tk.Entry(root, width=10, validate="key", validatecommand=vcmd, font=('Arial', 10))
    e.grid(row=i, column=1, padx=10, pady=5)
    entries.append(e)

tk.Button(root, text="Predict Stress Level", command=predict_stress,
          bg='#4CAF50', fg='white', font=('Arial', 12, 'bold')).grid(row=len(FEATURE_NAMES), column=0, columnspan=2, pady=15, sticky='ew', padx=10)

root.mainloop()

