In [2]:
import os
import json
import plotly.graph_objs as go
import json 
import os
from datetime import datetime
from pathlib import Path


# Functions

In [51]:
def parse_date(filename):
    try:
        return datetime.strptime(filename.split('.')[0], '%Y-%m-%d')
    except ValueError as e:
        pass

In [48]:

def plot_hist_feature(feature, basepath, output_dir):
    dates = os.listdir(basepath)

    #select only the 1st and 15th of each month
    dates_parsed = [(date, parse_date(date)) for date in dates if parse_date(date) is not None]
    sorted_dates_parsed = sorted(dates_parsed, key=lambda x: x[1])
    sorted_dates_filtered = [date for date, date_obj in sorted_dates_parsed if date_obj.day in {1, 15}]

    all_categories = set()
    for date in sorted_dates_filtered:
        date_json = os.path.join(basepath, date)
        with open(date_json, 'r') as f:
            data = json.load(f)
        if "histogram" in data["drilldowns"][feature]:
            all_categories.update(data["drilldowns"][feature]["histogram"]["x"])

        else:
            break
    # Convert set to sorted list to maintain order
    all_categories = sorted(all_categories)
    

    #make a figure and add the traces for histogram and kde
    fig = go.Figure()
    max_y_value = 0
    trace_counter = 0
    for i, date in enumerate(sorted_dates_filtered):
        date_json = os.path.join(basepath, date)

        with open(date_json, 'r') as f:
            data = json.load(f)

        if "kdehistplot" in data["drilldowns"][feature] and "kde_x" in data["drilldowns"][feature]["kdehistplot"]:
            # Extract the necessary data
            hist = data["drilldowns"][feature]["kdehistplot"]["hist"]
            edges = data["drilldowns"][feature]["kdehistplot"]["plot_edges"]
            centers = data["drilldowns"][feature]["kdehistplot"]["plot_centers"]
            kde_x = data["drilldowns"][feature]["kdehistplot"]["kde_x"]
            kde = data["drilldowns"][feature]["kdehistplot"]["kde"]

            # Add traces for each date
            fig.add_trace(
                go.Bar(x=centers, y=hist, marker=dict(color='blue'), name=f'Histogram', opacity=0.75, visible=(i == 0))
            )
            fig.add_trace(
                go.Scatter(x=kde_x, y=kde, mode='lines', line=dict(color='red'), name=f'KDE', visible=(i == 0))
            )
            trace_counter += 2
            max_hist_value = max(hist)
            max_kde_value = max(kde)
            max_y_value = max(max_y_value, max_hist_value, max_kde_value)

        elif "kdehistplot" in data["drilldowns"][feature]:
            hist = data["drilldowns"][feature]["kdehistplot"]["hist"]
            edges = data["drilldowns"][feature]["kdehistplot"]["plot_edges"]
            centers = data["drilldowns"][feature]["kdehistplot"]["plot_centers"]

            # Add traces for each date
            fig.add_trace(
                go.Bar(x=centers, y=hist, marker=dict(color='blue'), name=f'Histogram', opacity=0.75, visible=(i == 0))
            )

            trace_counter += 1
            max_hist_value = max(hist)
            max_y_value = max(max_y_value, max_hist_value, max_kde_value)

        else:
            # Categorical data processing
            probability = data["drilldowns"][feature]["histogram"]["probability"]
            category_data = {cat: 0 for cat in all_categories}  # Initialize all categories with 0
            for cat, prob in zip(data["drilldowns"][feature]["histogram"]["x"], probability):
                category_data[cat] = prob

            fig.add_trace(
                go.Bar(x=list(category_data.keys()), y=list(category_data.values()), marker=dict(color='blue'), name=f'Category Probability {date}', visible=(i == 0))
            )
            trace_counter += 1
            max_y_value = max(max_y_value, max(probability))


    # Create steps for the slider
    steps = []
    visibility_array = [False] * trace_counter

    current_trace_index = 0
    for i, date in enumerate(sorted_dates_filtered):
        visible = visibility_array[:]
        data = json.load(open(os.path.join(basepath, sorted_dates_filtered[i]), 'r'))
        if "kdehistplot" in data["drilldowns"][feature] and "kde_x" in data["drilldowns"][feature]["kdehistplot"]:
            visible[current_trace_index] = True
            visible[current_trace_index + 1] = True
            current_trace_index += 2

        elif "kdehistplot" in data["drilldowns"][feature]:
            visible[current_trace_index] = True
            current_trace_index += 1
        else:
            visible[current_trace_index] = True
            current_trace_index += 1

        steps.append({
            'method': 'update',
            'args': [{'visible': visible}, {'title': f"Histogram for {feature} on {date.split('.')[0]}"}],
            'label': date.split('.')[0]
        })
    # Create and add slider
    sliders = [dict(
        active=0,
        currentvalue={"prefix": "Date: "},
        pad={"t": 80},
        steps=steps
    )]

    fig.update_layout(
        sliders=sliders,
        title_text=f"Histogram for {feature} on " + sorted_dates_filtered[0],
        height=600,
        width=1000,
        title_x=0.5, 
        title_y=0.9,
    )
    fig.update_yaxes(range=[0, max_y_value])

    fig.write_html(os.path.join(output_dir,f'{feature}_histogram_interactive.html'))
    #fig.show()

# Analysis

In [61]:
basepath = '/autofs/cluster/qtim/projects/xray_drift/drift_analyses/classification_final_allpoc_standarddrift/history/'

In [62]:
date_json = os.path.join(basepath, '2020-07-01.json')
with open(date_json, 'r') as f:
    data = json.load(f)

keys = data['drilldowns'].keys()
keys

dict_keys(['activation.Atelectasis', 'activation.Cardiomegaly', 'activation.Consolidation', 'activation.Edema', 'activation.Lung Opacity', 'activation.Pleural Other', 'activation.Pleural Effusion', 'activation.Pneumonia', 'activation.Pneumothorax', 'activation.Support Devices', 'mu.000', 'mu.001', 'mu.002', 'mu.003', 'mu.004', 'mu.005', 'mu.006', 'mu.007', 'mu.008', 'mu.009', 'mu.010', 'mu.011', 'mu.012', 'mu.013', 'mu.014', 'mu.015', 'mu.016', 'mu.017', 'mu.018', 'mu.019', 'mu.020', 'mu.021', 'mu.022', 'mu.023', 'mu.024', 'mu.025', 'mu.026', 'mu.027', 'mu.028', 'mu.029', 'mu.030', 'mu.031', 'mu.032', 'mu.033', 'mu.034', 'mu.035', 'mu.036', 'mu.037', 'mu.038', 'mu.039', 'mu.040', 'mu.041', 'mu.042', 'mu.043', 'mu.044', 'mu.045', 'mu.046', 'mu.047', 'mu.048', 'mu.049', 'mu.050', 'mu.051', 'mu.052', 'mu.053', 'mu.054', 'mu.055', 'mu.056', 'mu.057', 'mu.058', 'mu.059', 'mu.060', 'mu.061', 'mu.062', 'mu.063', 'mu.064', 'mu.065', 'mu.066', 'mu.067', 'mu.068', 'mu.069', 'mu.070', 'mu.071', '

In [63]:
if keys:
    print("present")
else:
    print("empty")

present


In [52]:
keys = ['Is Stat', 'Exam Code']

In [41]:
output_dir = '/autofs/cluster/qtim/projects/xray_drift/drift_analyses/classification_final_allpoc_standarddrift_PLOTS/histograms'

output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
output_dir

PosixPath('/autofs/cluster/qtim/projects/xray_drift/drift_analyses/classification_final_allpoc_standarddrift_PLOTS/histograms')

In [55]:
from tqdm import tqdm

In [56]:
for k in tqdm(keys, desc="Creating Histograms"):
    plot_hist_feature(k, basepath = basepath, output_dir=output_dir)


  0%|          | 0/2 [00:08<?, ?it/s]


KeyboardInterrupt: 