In [4]:
import tkinter as tk
from tkinter import ttk
from tkinter import Tk, Label
import pandas as pd
import matplotlib.pyplot as plt
from pymongo import MongoClient
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure

# DB setting
MONGO_HOST = 'localhost'
MONGO_PORT = 27017
MONGO_DB = 'admin'
MONGO_USERNAME = 'root'
MONGO_PASSWORD = 'mongo_password'

def get_database():
    uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
    client = MongoClient(uri)
    return client[MONGO_DB]

def load_pivot_data_from_mongo():
    db = get_database()
    data = list(db["count_data"].find({}, {"_id": 0}))
    pivot_df = pd.DataFrame(data)
    if "User_ID" in pivot_df.columns and "Month" in pivot_df.columns:
        pivot_df.set_index(["User_ID", "Month"], inplace=True)
    return pivot_df


# Outlier removal for correlation
def remove_outliers(df, components):
    for comp in components:
        if comp in df.columns:
            q_low = df[comp].quantile(0.15)
            q_high = df[comp].quantile(0.85)
            df = df[(df[comp] >= q_low) & (df[comp] <= q_high)]
    return df

# Trend line
def linear_regression(x, y):
    n = len(x)
    x_mean = sum(x) / n
    y_mean = sum(y) / n
    numerator = sum((x_i - x_mean) * (y_i - y_mean) for x_i, y_i in zip(x, y))
    denominator = sum((x_i - x_mean) ** 2 for x_i in x)
    slope = numerator / denominator
    intercept = y_mean - slope * x_mean
    return slope, intercept

# Function to calculate statistics
def calculate_statistics(pivot_data, target_components):
    stats_by_month = {}
    # a. For each month
    for month, group in pivot_data.groupby(level='Month'):
        stats_by_month[month] = {
            comp: {
                "mean": round(group[comp].mean(), 1) if comp in group.columns else None,
                "median": round(group[comp].median(), 1) if comp in group.columns else None,
                "mode": round(group[comp].mode().iloc[0], 1) if comp in group.columns and not group[comp].mode().empty else None
            } for comp in target_components
        }
    # b. For the entire 13-week academic semester 
    all_data = pivot_data.reset_index(drop=False)
    stats_by_month["Entire Semester"] = {
        comp: {
            "mean": round(all_data[comp].mean(), 1) if comp in all_data.columns else None,
            "median": round(all_data[comp].median(), 1) if comp in all_data.columns else None,
            "mode": round(all_data[comp].mode().iloc[0], 1) if comp in all_data.columns and not all_data[comp].mode().empty else None
        } for comp in target_components
    }
    return stats_by_month


pivot_data = load_pivot_data_from_mongo()
target_components = ["Quiz", "Lecture", "Assignment", "Attendance", "Survey"]
correlation_components = ["Assignment", "Quiz", "Lecture", "Book", "Project", "Course"]
stats_by_month = calculate_statistics(pivot_data, target_components)


# Main Menu
def main_menu():
    def open_statistics():
        menu_window.destroy()
        statistics_page()

    def open_correlation():
        menu_window.destroy()
        correlation_page()

    menu_window = Tk()
    menu_window.title("Main Menu")
    menu_window.geometry("400x200")
    Label(menu_window, text="Select an Option:", font=("Arial", 14)).pack(pady=10)
    ttk.Button(menu_window, text="Statistics", command=open_statistics).pack(pady=5)
    ttk.Button(menu_window, text="Correlation", command=open_correlation).pack(pady=5)
    menu_window.mainloop()

# Statistics Page
def statistics_page():
    def go_back():
        stats_window.destroy()
        main_menu()

    def show_statistics():
        for i in stat_tree.get_children():
            stat_tree.delete(i)
        selected = month_var.get()
        data_source = stats_by_month.get("Entire Semester" if selected == "Entire Semester" else int(selected), {})
        for comp, stats in data_source.items():
            stat_tree.insert("", "end", values=(comp, stats["mean"], stats["median"], stats["mode"]))

    stats_window = Tk()
    stats_window.title("Statistics")
    Label(stats_window, text="Select Month:").pack(pady=5)
    month_options = list(stats_by_month.keys())
    month_options_str = [str(m) for m in month_options if m != "Entire Semester"] + ["Entire Semester"]
    month_var = tk.StringVar(value="Entire Semester")
    ttk.Combobox(stats_window, textvariable=month_var, values=month_options_str, state="readonly").pack(pady=5)
    ttk.Button(stats_window, text="Show Statistics", command=show_statistics).pack(pady=5)
    stat_tree = ttk.Treeview(stats_window, columns=("Component", "Mean", "Median", "Mode"), show="headings")
    for col in ["Component", "Mean", "Median", "Mode"]:
        stat_tree.heading(col, text=col)
    stat_tree.pack(pady=10)
    ttk.Button(stats_window, text="Back", command=go_back).pack(pady=5)
    stats_window.mainloop()


# Outlier removal for corralation
def remove_outliers(df, components):
    for comp in components:
        if comp in df.columns:
            q_low = df[comp].quantile(0.15)
            q_high = df[comp].quantile(0.85)
            df = df[(df[comp] >= q_low) & (df[comp] <= q_high)]
    return df

# Trend line
def linear_regression(x, y):
    n = len(x)
    x_mean = sum(x) / n
    y_mean = sum(y) / n
    numerator = sum((x_i - x_mean) * (y_i - y_mean) for x_i, y_i in zip(x, y))
    denominator = sum((x_i - x_mean) ** 2 for x_i in x)
    slope = numerator / denominator
    intercept = y_mean - slope * x_mean
    return slope, intercept

# Correlation Page
def correlation_page():
    def go_back():
        corr_window.destroy()
        main_menu()

    corr_window = Tk()
    corr_window.title("Correlation Analysis with Trend Lines")
    fig = Figure(figsize=(8, 6))
    ax = fig.add_subplot(111)

    clean_data = remove_outliers(pivot_data.copy(), correlation_components)
    for comp in correlation_components:
        if comp in clean_data.columns:
            x = clean_data.index.get_level_values("User_ID")
            y = clean_data[comp].fillna(0)
            ax.scatter(x, y, label=comp, alpha=0.5)
            slope, intercept = linear_regression(x, y)
            ax.plot(x, [slope * xi + intercept for xi in x], linestyle='dashed', label=f"{comp} Trend")

    ax.set_xlabel("User_ID")
    ax.set_ylabel("Interactions")
    ax.legend()
    ax.set_title("Correlation Analysis with Trend Lines")
    canvas = FigureCanvasTkAgg(fig, master=corr_window)
    canvas.draw()
    canvas.get_tk_widget().pack()
    ttk.Button(corr_window, text="Back", command=go_back).pack(pady=5)
    corr_window.mainloop()

main_menu()

Exception in Tkinter callback
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/lib/python3.12/tkinter/__init__.py", line 1968, in __call__
    return self.func(*args)
           ^^^^^^^^^^^^^^^^
  File "/var/folders/dx/92lq65kj1pv01fns32c076kh0000gn/T/ipykernel_14389/2342067688.py", line 83, in open_statistics
    statistics_page()
  File "/var/folders/dx/92lq65kj1pv01fns32c076kh0000gn/T/ipykernel_14389/2342067688.py", line 116, in statistics_page
    month_var = tk.StringVar(value="Entire Semester")
                ^^
NameError: name 'tk' is not defined. Did you mean: 'Tk'?
