In [None]:
import os
import json
import plotly.graph_objs as go
import json 
import os
from datetime import datetime

# Functions

In [None]:
def parse_date(filename):
    try:
        return datetime.strptime(filename.split('.')[0], '%Y-%m-%d')
    except ValueError as e:
        print(f"Error processing {filename}: {e}")

In [None]:

def plot_hist_feature(feature, basepath):
    dates = os.listdir(basepath)

    #select only the 1st and 15th of each month
    dates_parsed = [(date, parse_date(date)) for date in dates if parse_date(date) is not None]
    sorted_dates_parsed = sorted(dates_parsed, key=lambda x: x[1])
    sorted_dates_filtered = [date for date, date_obj in sorted_dates_parsed if date_obj.day in {1, 15}]

    all_categories = set()
    for date in sorted_dates_filtered:
        date_json = os.path.join(basepath, date)
        with open(date_json, 'r') as f:
            data = json.load(f)
        if "histogram" in data["drilldowns"][feature]:
            all_categories.update(data["drilldowns"][feature]["histogram"]["x"])

        else:
            break
    # Convert set to sorted list to maintain order
    all_categories = sorted(all_categories)
    

    #make a figure and add the traces for histogram and kde
    fig = go.Figure()
    max_y_value = 0
    trace_counter = 0
    for i, date in enumerate(sorted_dates_filtered):
        date_json = os.path.join(basepath, date)

        with open(date_json, 'r') as f:
            data = json.load(f)

        if "kdehistplot" in data["drilldowns"][feature]:
            # Extract the necessary data
            hist = data["drilldowns"][feature]["kdehistplot"]["hist"]
            edges = data["drilldowns"][feature]["kdehistplot"]["plot_edges"]
            centers = data["drilldowns"][feature]["kdehistplot"]["plot_centers"]
            kde_x = data["drilldowns"][feature]["kdehistplot"]["kde_x"]
            kde = data["drilldowns"][feature]["kdehistplot"]["kde"]

            # Add traces for each date
            fig.add_trace(
                go.Bar(x=centers, y=hist, marker=dict(color='blue'), name=f'Histogram', opacity=0.75, visible=(i == 0))
            )
            fig.add_trace(
                go.Scatter(x=kde_x, y=kde, mode='lines', line=dict(color='red'), name=f'KDE', visible=(i == 0))
            )
            trace_counter += 2
            max_hist_value = max(hist)
            max_kde_value = max(kde)
            max_y_value = max(max_y_value, max_hist_value, max_kde_value)

        else:
            # Categorical data processing
            #x = data["drilldowns"][feature]["histogram"]["x"]
            probability = data["drilldowns"][feature]["histogram"]["probability"]
            category_data = {cat: 0 for cat in all_categories}  # Initialize all categories with 0
            for cat, prob in zip(data["drilldowns"][feature]["histogram"]["x"], probability):
                category_data[cat] = prob

            fig.add_trace(
                go.Bar(x=list(category_data.keys()), y=list(category_data.values()), marker=dict(color='blue'), name=f'Category Probability {date}', visible=(i == 0))
            )
            trace_counter += 1
            max_y_value = max(max_y_value, max(probability))

    # Create steps for the slider
    steps = []
    visibility_array = [False] * trace_counter

    current_trace_index = 0
    for i, date in enumerate(sorted_dates_filtered):
        visible = visibility_array[:]
        data = json.load(open(os.path.join(basepath, sorted_dates_filtered[i]), 'r'))
        if "kdehistplot" in data["drilldowns"][feature]:
            visible[current_trace_index] = True
            visible[current_trace_index + 1] = True
            current_trace_index += 2
        else:
            visible[current_trace_index] = True
            current_trace_index += 1

        steps.append({
            'method': 'update',
            'args': [{'visible': visible}, {'title': f"Histogram for {feature} on {date.split('.')[0]}"}],
            'label': date.split('.')[0]
        })
    # Create and add slider
    sliders = [dict(
        active=0,
        currentvalue={"prefix": "Date: "},
        pad={"t": 50},
        steps=steps
    )]

    fig.update_layout(
        sliders=sliders,
        title_text=f"Histogram for {feature} on " + sorted_dates_filtered[0],
        height=600,
        width=1000,
        title_x=0.5, 
        title_y=0.9,
    )
    fig.update_yaxes(range=[0, max_y_value])

    fig.write_html(f'{feature}_histogram_interactive.html')
    fig.show()

# Analysis

In [None]:
basepath = '/autofs/cluster/qtim/projects/xray_drift/drift_analyses/classification_final_allpoc_standarddrift/history/'


In [None]:
date_json = os.path.join(basepath, '2020-07-03.json')
with open(date_json, 'r') as f:
    data = json.load(f)

data['drilldowns'].keys()


In [None]:
plot_hist_feature('Point of Care', basepath)