<a href="https://colab.research.google.com/github/JJChrzanowski/GlyCulator3_hotfix/blob/main/Completeness_GlyCulator_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Completeness fix for GlyCulator analysis using GlyCulator API

Please follow these steps:

*   Enter your original_session_id and api_key in the variables below.
*   Run this cell.

The code will:

*   Connect to the GlyCulator API
*   Retrieve the original analysis details
*   Find appropraite data to compute completeness for your predefined files and periods
*   Show you the final results as a table
*   Provide a link to download the results as a CSV file



In [None]:
# @title
import requests
import pandas as pd
from datetime import datetime

base_url = "https://glyculator.btm.umed.pl/api"

def get_analysis(session_id, api_key):
    url = f"{base_url}/analyses/{session_id}?api_key={api_key}"
    r = requests.get(url)
    r.raise_for_status()
    return r.json()

def get_file_info(file_hash, api_key):
    url = f"{base_url}/files/{file_hash}?api_key={api_key}"
    r = requests.get(url)
    r.raise_for_status()
    return r.json()

def get_analysis_raw_data_csv(session_id, api_key):
    url = f"{base_url}/analysis_raw_data_csv?session_id={session_id}&api_key={api_key}"
    r = requests.get(url)
    r.raise_for_status()
    # It's a CSV text; convert directly to a DataFrame
    from io import StringIO
    return pd.read_csv(StringIO(r.text))

def compute_completeness_no_imputation(file_info, date_from, date_to):
    if not file_info or 'basic_stats' not in file_info:
        return None
    start_period = pd.to_datetime(date_from)
    end_period = pd.to_datetime(date_to) + timedelta(days=1) - timedelta(seconds=1)

    mindate = pd.to_datetime(file_info['basic_stats']['mindate'])
    maxdate = pd.to_datetime(file_info['basic_stats']['maxdate'])

    adjusted_start = max(mindate, start_period)
    adjusted_end = min(maxdate, end_period)

    if adjusted_start >= adjusted_end:
        return None

    max_count = file_info['basic_stats']['max_count']
    day_data = pd.DataFrame(file_info['basic_stats']['data'])
    day_data['date'] = pd.to_datetime(day_data['date']).dt.date

    start_date = adjusted_start.date()
    end_date = adjusted_end.date()

    mask = (day_data['date'] >= start_date) & (day_data['date'] <= end_date)
    filtered = day_data[mask]

    total_count = filtered['count'].sum()
    total_hours = (adjusted_end - adjusted_start).total_seconds() / 3600.0
    expected_count = (total_hours / 24.0) * max_count if max_count > 0 else None
    if expected_count is None or expected_count == 0:
        return None

    completeness = total_count / expected_count
    return completeness


def compute_completeness_with_imputation(raw_data_df, file_hash, date_from, date_to, max_count):
    # Filter by file_hash and date range
    # raw_data_df columns: Filename, Date, Glucose, Type
    mask_file = raw_data_df['Filename'].eq(file_hash)
    raw_data_df['Date_dt'] = pd.to_datetime(raw_data_df['Date'])
    start = pd.to_datetime(date_from)
    end = pd.to_datetime(date_to)
    mask_date = (raw_data_df['Date_dt'] >= start) & (raw_data_df['Date_dt'] <= end)
    filtered = raw_data_df[mask_file & mask_date]

    # Compute num_days
    num_days = (end.date() - start.date()).days + 1
    total_count = len(filtered)
    expected_count = max_count * num_days
    completeness = total_count / expected_count if expected_count > 0 else None
    return completeness

In [None]:
# NEVER SHARE YOUR API KEY!!!
API_KEY = "API_KEY"
SESSION_ID = "SESSION_ID"

In [None]:
# Main logic
analysis = get_analysis(SESSION_ID, API_KEY)
imputation_method = analysis.get('imputation_method')

fixed_results = []
for af in analysis['analysis_files']:
    date_from = af['date_from']
    date_to = af['date_to']
    fhash = af['file']['file_hash']

    # If no imputation method: we can directly use file info
    if analysis['imputation_method'] is None:
        file_info = get_file_info(fhash, API_KEY)
        max_count = file_info['basic_stats']['max_count']
        completeness = compute_completeness_no_imputation(file_info, date_from, date_to)
    else:
        # Imputation is not null, fallback to raw_data_csv
        # This might be slow!
        raw_data = get_analysis_raw_data_csv(SESSION_ID, API_KEY)
        # We still need max_count from file info because raw_data doesn't have it
        file_info = get_file_info(fhash, API_KEY)
        max_count = file_info['basic_stats']['max_count']
        completeness = compute_completeness_with_imputation(raw_data, fhash, date_from, date_to, max_count)

    fixed_results.append({
        'filename': af['file']['filename'],
        'file_hash': fhash,
        'date_from': date_from,
        'date_to': date_to,
        'original_completeness': af['indices']['whole']['completeness'] if 'indices' in af and 'whole' in af['indices'] else None,
        'fixed_completeness': completeness
    })

# fixed_results now contains corrected completeness values for each file.
# Next step could be to PATCH them back if there's an endpoint or store them locally.
df_results = pd.DataFrame(results)
df_results
