### Product, Order Date, Order Hour, Purchase Address
### Quantity Ordered, Price Each, Sales

In [10]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import numpy as np 
import os
from IPython.display import HTML
import tempfile
import webbrowser

csv_path = "../csv/"

pd.set_option('display.max_rows', None)     # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns

# for csv_filename in os.listdir("../csv/"):
#     df = pd.read_csv(f"../csv/{csv_filename}")
#     print(csv_filename, df.head())

df = pd.read_csv(csv_path + os.listdir(csv_path)[0])


def display_plotly_html(html_string):
    return HTML(html_string)

def overviwe_plotly_html(html_string):
    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html", encoding="utf-8") as f:
        f.write(html_string)
        webbrowser.open(f.name)


def preprocess_data(df):
    df["Quantity Ordered"] = pd.to_numeric(df["Quantity Ordered"], errors="coerce")
    df["Price Each"] = pd.to_numeric(df["Price Each"], errors="coerce")
    df["Sales"] = df["Price Each"] * df["Quantity Ordered"]
    df["Order Value"] = df["Sales"] / df["Quantity Ordered"]
    df["Order Date"] = pd.to_datetime(df["Order Date"], format="%m/%d/%y %H:%M", errors="coerce")
    df["Date"] = df["Order Date"].dt.date
    df["Time"] = df["Order Date"].dt.time
    df["Month"] = df["Order Date"].dt.month
    df["Week"] = df["Order Date"].dt.isocalendar().week
    df["Day"] = df["Order Date"].dt.day
    df["Hour"] = df["Order Date"].dt.hour
    df["City"] = df["Purchase Address"].str.split(",", expand=True)[1]
    return df

def generate_charts(df, group_by, label, plot_config=None):
    charts = {}
    group_key = f"grouped_by_{group_by.lower()}"
    label_key = f"for_{label.lower().replace(' ', '_')}"

    default_plot_types = {
        "quantity_ordered": "bar",           # Best for comparing discrete volumes across time or categories
        "total_sales": "bar",                # Revenue comparisons pop better in grouped bars
        "average_price": "line",             # Ideal for showing price trends over time
        "average_order_value": "scatter",    # Great for spotting variability and outliers
        "total_order_value": "line"          # Smooth progression of cumulative value over time
    }
    plot_types = plot_config if plot_config else default_plot_types

    def get_plot_func(metric):
        plot_type = plot_types.get(metric, "bar")
        return {
            "bar": px.bar,
            "line": px.line,
            "scatter": px.scatter
        }.get(plot_type, px.bar)

    quantity = df.groupby(group_by, as_index=False)["Quantity Ordered"].sum()
    fig_quantity = get_plot_func("quantity_ordered")(quantity, x=group_by, y="Quantity Ordered", title=f"Quantity Ordered {group_key} {label}")
    charts[f"quantity_ordered_{group_key}_{label_key}"] = pio.to_html(fig_quantity, full_html=False)

    sales = df.groupby(group_by, as_index=False)["Sales"].sum()
    fig_sales = get_plot_func("total_sales")(sales, x=group_by, y="Sales", title=f"Total Sales {group_key} {label}")
    charts[f"total_sales_{group_key}_{label_key}"] = pio.to_html(fig_sales, full_html=False)

    if group_by == "Product":
        price = df.groupby(group_by, as_index=False)["Price Each"].mean()
        fig_price = get_plot_func("average_price")(price, x=group_by, y="Price Each", title=f"Average Price {group_key} {label}")
        charts[f"average_price_{group_key}_{label_key}"] = pio.to_html(fig_price, full_html=False)
    else:
        order_value_avg = df.groupby(group_by, as_index=False)["Order Value"].mean()
        fig_avg = get_plot_func("average_order_value")(order_value_avg, x=group_by, y="Order Value", title=f"Average Order Value {group_key} {label}")
        charts[f"average_order_value_{group_key}_{label_key}"] = pio.to_html(fig_avg, full_html=False)

        order_value_total = df.groupby(group_by, as_index=False)["Order Value"].sum()
        fig_total = get_plot_func("total_order_value")(order_value_total, x=group_by, y="Order Value", title=f"Total Order Value {group_key} {label}")
        charts[f"total_order_value_{group_key}_{label_key}"] = pio.to_html(fig_total, full_html=False)

    return charts


def run_analysis(df, group_index=0, specific_products=None, plot_config=None):
    group = ["Product", "Date", "Time", "Month", "Week", "Day", "Hour", "City"]
    group_by = group[group_index]
    all_charts = {}

    all_charts.update(generate_charts(df, group_by, "All Products", plot_config))

    if specific_products is None:
        specific_products = df["Product"].dropna().unique().tolist()

    for product in specific_products:
        filtered_df = df[df["Product"] == product]
        if filtered_df.empty:
            continue
        all_charts.update(generate_charts(filtered_df, group_by, product, plot_config))

    return all_charts


def filter_charts_by_product_label(html_charts: dict, keyword: str) -> dict:
    """
    Filters chart dictionary by keyword match in the product label only.

    Parameters:
        html_charts (dict): Dictionary of chart HTMLs.
        keyword (str): Keyword to match in product label (case-insensitive).

    Returns:
        dict: Filtered chart dictionary.
    """
    keyword = keyword.strip().lower()
    filtered = {}

    for key, chart in html_charts.items():
        # Extract product label from key
        if "_for_" in key:
            product_label = key.split("_for_")[-1].lower()
            if keyword in product_label:
                filtered[key] = chart

    return filtered



plot_config = {
    "quantity_ordered": "bar",           # Best for comparing discrete volumes across time or categories
    "total_sales": "bar",                # Revenue comparisons pop better in grouped bars
    "average_price": "line",             # Ideal for showing price trends over time
    "average_order_value": "scatter",    # Great for spotting variability and outliers
    "total_order_value": "line"          # Smooth progression of cumulative value over time
}

cooked_df = preprocess_data(df)
html_charts = run_analysis(cooked_df, group_index=6, plot_config=plot_config) 
filter_charts = filter_charts_by_product_label(html_charts, "all_product")

print("done")
for i in list(filter_charts):
    print("show pls")
    overviwe_plotly_html(filter_charts[i])

done
show pls
show pls
show pls
show pls


In [None]:
# Product, Order Date, Order Hour, Purchase Address
# Quantity Ordered, Price Each, Sales
# "<metric_name>_grouped_by_<grouping_dimension>_for_<product_label>"


# plot_config = {
#         "quantity_ordered": "bar",
#         "total_sales": "bar",
#         "average_price": "line",
#         "average_order_value": "scatter",
#         "total_order_value": "bar"
# }

# html_charts = run_analysis(df, group_index=1, plot_config=plot_config)

