In [15]:
from tkinter import Tk, Label
import pandas as pd
import matplotlib.pyplot as plt
from pymongo import MongoClient
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure

MONGO_HOST = 'localhost'
MONGO_PORT = 27017
MONGO_DB = 'admin'
MONGO_USERNAME = 'root'
MONGO_PASSWORD = 'mongo_password'

def get_database():
    uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
    client = MongoClient(uri)
    return client[MONGO_DB]

def load_pivot_data_from_mongo():
    db = get_database()
    data = list(db["count_data"].find({}, {"_id": 0}))
    pivot_df = pd.DataFrame(data)
    if "User_ID" in pivot_df.columns and "Month" in pivot_df.columns:
        pivot_df.set_index(["User_ID", "Month"], inplace=True)
    return pivot_df

def remove_outliers(df, components):
    for comp in components:
        if comp in df.columns:
            q_low = df[comp].quantile(0.15)
            q_high = df[comp].quantile(0.85)
            df = df[(df[comp] >= q_low) & (df[comp] <= q_high)]
    return df


def linear_regression(x, y):
    n = len(x)
    x_mean = sum(x) / n
    y_mean = sum(y) / n
    numerator = sum((x_i - x_mean) * (y_i - y_mean) for x_i, y_i in zip(x, y))
    denominator = sum((x_i - x_mean) ** 2 for x_i in x)
    slope = numerator / denominator
    intercept = y_mean - slope * x_mean
    return slope, intercept

# Load pivot data
pivot_data = load_pivot_data_from_mongo()

# Target components
correlation_components = ["Assignment", "Quiz", "Lecture", "Book", "Project", "Course"]

# Correlation page
def correlation_page():
    corr_window = Tk()
    corr_window.title("Correlation")

    fig = Figure(figsize=(8, 6))
    ax = fig.add_subplot(111)

    if pivot_data.empty:
        Label(corr_window, text="No data available for correlation analysis.", fg="red").pack(pady=10)
    else:
        size_per_user = pivot_data.groupby(level="User_ID").size()
        clean_data = remove_outliers(pivot_data.copy(), correlation_components)
        
        # Graph Dispaly
        for comp in correlation_components:
            if comp in clean_data.columns:
                x = clean_data.index.get_level_values("User_ID")
                y = clean_data[comp].fillna(0)
                ax.scatter(x, y, label=comp, alpha=0.5)

                if len(x) > 1:
                    slope, intercept = linear_regression(x, y)
                    trend_line = [slope * xi + intercept for xi in x]
                    ax.plot(x, trend_line, linestyle='dashed', label=f"{comp} Trend")

        ax.set_xlabel("User_ID")
        ax.set_ylabel("Components")
        ax.legend()
        ax.set_title("Correlation Analysis with Trend Lines")

    canvas = FigureCanvasTkAgg(fig, master=corr_window)
    canvas.draw()
    canvas.get_tk_widget().pack()

    corr_window.mainloop()

# MAIN
correlation_page()


In [19]:
# no change

from tkinter import Tk, Label
import pandas as pd
import matplotlib.pyplot as plt
from pymongo import MongoClient
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure

MONGO_HOST = 'localhost'
MONGO_PORT = 27017
MONGO_DB = 'admin'
MONGO_USERNAME = 'root'
MONGO_PASSWORD = 'mongo_password'

def get_database():
    uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
    client = MongoClient(uri)
    return client[MONGO_DB]

def load_pivot_data_from_mongo():
    db = get_database()
    data = list(db["count_data"].find({}, {"_id": 0}))
    pivot_df = pd.DataFrame(data)
    if "User_ID" in pivot_df.columns and "Month" in pivot_df.columns:
        pivot_df.set_index(["User_ID", "Month"], inplace=True)
    return pivot_df

def linear_regression(x, y):
    n = len(x)
    x_mean = sum(x) / n
    y_mean = sum(y) / n
    numerator = sum((x_i - x_mean) * (y_i - y_mean) for x_i, y_i in zip(x, y))
    denominator = sum((x_i - x_mean) ** 2 for x_i in x)
    slope = numerator / denominator
    intercept = y_mean - slope * x_mean
    return slope, intercept

# Load pivot data
pivot_data = load_pivot_data_from_mongo()

# Target components
correlation_components = ["Assignment", "Quiz", "Lecture", "Book", "Project", "Course"]

# Correlation page (without cleaning)
def correlation_page():
    corr_window = Tk()
    corr_window.title("Correlation (No Cleaning)")

    fig = Figure(figsize=(8, 6))
    ax = fig.add_subplot(111)

    if pivot_data.empty:
        Label(corr_window, text="No data available for correlation analysis.", fg="red").pack(pady=10)
    else:
        size_per_user = pivot_data.groupby(level="User_ID").size()

        for comp in correlation_components:
            if comp in pivot_data.columns:
                x = pivot_data.index.get_level_values("User_ID")
                y = pivot_data[comp].fillna(0)
                ax.scatter(x, y, label=comp, alpha=0.5)

                if len(x) > 1:
                    slope, intercept = linear_regression(x, y)
                    trend_line = [slope * xi + intercept for xi in x]
                    ax.plot(x, trend_line, linestyle='dashed', label=f"{comp} Trend")

        ax.set_xlabel("User_ID")
        ax.set_ylabel("Components")
        ax.legend()
        ax.set_title("Correlation Analysis without Additional Cleaning")

    canvas = FigureCanvasTkAgg(fig, master=corr_window)
    canvas.draw()
    canvas.get_tk_widget().pack()

    corr_window.mainloop()

# MAIN
correlation_page()
