In [None]:
from common_imports import *
show_home_button()
from db_connection import get_engine
engine = get_engine()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

from frequency_utils import (
    get_freq_minutes,
    get_freq_seconds,
    get_pandas_freq,
    check_max_rows,
    round_datetime_to_freq,
    detect_auto_frequency,
    resample_dataframe,
)

from progress_bar_widget import ProgressBarWidget
progress_widget = ProgressBarWidget()

from caching import TTLCache  # <-- CACHE IMPORT

min_max_cache = TTLCache(ttl=300)
full_data_cache = TTLCache(ttl=300)

from mappings import get_typeids, validate_unique_ids, group_typeid_mapping
validate_unique_ids()

MAX_ROWS = 8000

def parse_user_datetime(dt_str: str) -> Optional[datetime]:
    try:
        return datetime.strptime(dt_str, '%d/%m/%Y %H:%M')
    except ValueError:
        return None

def generate_insights(df: pd.DataFrame) -> str:
    insights_df = get_insights_df(df)
    if insights_df.empty:
        return "Geen inzichten beschikbaar."
    return insights_df.to_html(classes="dataframe", border=0)

def get_insights_df(df: pd.DataFrame) -> pd.DataFrame:
    if df is None or df.empty:
        return pd.DataFrame()
    time_col = None
    if "UTC Period" in df.columns:
        time_col = "UTC Period"
    elif "utcperiod" in df.columns:
        time_col = "utcperiod"
    status_cols = [col for col in df.columns if 'status' in col.lower()]
    if not status_cols:
        return pd.DataFrame()
    rows = []
    for col in status_cols:
        for stat in ["P", "T"]:
            count = (df[col] == stat).sum()
            if count > 0 and time_col is not None:
                dates = pd.to_datetime(df.loc[df[col] == stat, time_col], errors='coerce')
                start_date = dates.min()
                end_date = dates.max()
            else:
                start_date = None
                end_date = None
            rows.append({
                "Kanaal": col,
                "Status": stat,
                "Count": count,
                "Van datum": start_date,
                "Tot datum": end_date
            })
    insights_df = pd.DataFrame(rows)
    insights_df.set_index(["Kanaal", "Status"], inplace=True)
    return insights_df

def group_columns_by_typeid(df: pd.DataFrame, engine, group_mapping: dict) -> pd.DataFrame:
    import re
    registerid_pattern = re.compile(r'\((\d+)\)')
    col_to_registerid = {}
    for col in df.columns:
        if col.lower() in ['utcperiod', 'utc period']:
            continue
        match = registerid_pattern.search(col)
        if match:
            reg_id = int(match.group(1))
            col_to_registerid[col] = reg_id
    if not col_to_registerid:
        raise ValueError("Geen register-ID's gevonden in de DataFrame kolomnamen.")
    query = "SELECT ID, TypeId FROM dbo.TBL_Register WHERE ID IN ({})".format(
        ",".join(map(str, list(col_to_registerid.values())))
    )
    with engine.connect() as conn:
        mapping_df = pd.read_sql_query(query, conn)
    registerid_to_typeid = dict(zip(mapping_df['ID'], mapping_df['TypeId']))
    grouped_df = df[['utcperiod']].copy() if 'utcperiod' in df.columns else df.copy()
    for group_name, typeid_list in group_mapping.items():
        cols_for_group = []
        for col, reg_id in col_to_registerid.items():
            typeid_val = registerid_to_typeid.get(reg_id)
            if typeid_val in typeid_list:
                cols_for_group.append(col)
        if cols_for_group:
            numeric_subset = [c for c in cols_for_group if '(status)' not in c.lower()]
            if numeric_subset:
                grouped_df[group_name + " Total"] = df[numeric_subset].sum(axis=1, numeric_only=True)
            else:
                grouped_df[group_name + " Total"] = 0
        else:
            grouped_df[group_name + " Total"] = 0
    return grouped_df

def filter_columns_by_selected_groups(df: pd.DataFrame, engine, group_mapping: dict) -> pd.DataFrame:
    import re
    registerid_pattern = re.compile(r'\((\d+)\)')
    col_to_registerid = {}
    for col in df.columns:
        if col.lower() in ['utcperiod', 'utc period']:
            continue
        match = registerid_pattern.search(col)
        if match:
            col_to_registerid[col] = int(match.group(1))
    if not col_to_registerid:
        return df[['utcperiod']] if 'utcperiod' in df.columns else pd.DataFrame()
    query = "SELECT ID, TypeId FROM dbo.TBL_Register WHERE ID IN ({})".format(
        ",".join(map(str, list(col_to_registerid.values())))
    )
    with engine.connect() as conn:
        mapping_df = pd.read_sql_query(query, conn)
    regid_to_typeid = dict(zip(mapping_df['ID'], mapping_df['TypeId']))
    selected_cols = []
    for col, reg_id in col_to_registerid.items():
        typeid_val = regid_to_typeid.get(reg_id)
        for grp, tid_list in group_mapping.items():
            if typeid_val in tid_list:
                selected_cols.append(col)
                break
    cols = []
    if 'utcperiod' in df.columns:
        cols.append('utcperiod')
    cols.extend(selected_cols)
    return df[cols]

search_method_dropdown = widgets.Dropdown(
    options=[
        ("TransferpointID", "transferpoint"),
        ("ObjectID", "objectid"),
        ("RegisterID", "registerid"),
        ("RegistratorID", "registratorid")
    ],
    value="transferpoint",
    description="Filter:"
)
search_method_dropdown.layout = widgets.Layout(width='240px', height='35px')

def fetch_typeids_for_ean(ean_value: str) -> Set[int]:
    search_method = search_method_dropdown.value
    try:
        if search_method == "transferpoint":
            query = """
                SELECT DISTINCT r.TypeId
                FROM TBL_Register r
                JOIN TBL_ConnectionPoint cp ON cp.ID = r.ConnectionPointId
                WHERE cp.EAN_ConnectionPoint = ?
                      OR cp.TransferPointID IN (
                          SELECT ID FROM TBL_ConnectionPoint WHERE EAN_ConnectionPoint = ?
                      )
            """
            with engine.connect() as conn:
                df_temp = pd.read_sql_query(query, conn, params=(ean_value, ean_value))
        elif search_method == "objectid":
            query = """
                SELECT DISTINCT r.TypeId
                FROM TBL_Register r
                JOIN TBL_ConnectionPoint cp ON cp.ID = r.ConnectionPointId
                WHERE cp.ObjectId = (
                    SELECT TOP 1 cp2.ObjectId
                    FROM TBL_ConnectionPoint cp2
                    WHERE cp2.EAN_ConnectionPoint = ?
                )
            """
            with engine.connect() as conn:
                df_temp = pd.read_sql_query(query, conn, params=(ean_value,))
        elif search_method == "registerid":
            query = """
                SELECT DISTINCT TypeId
                FROM TBL_Register
                WHERE ID = ?
            """
            try:
                register_id = int(ean_value)
            except ValueError:
                return set()
            with engine.connect() as conn:
                df_temp = pd.read_sql_query(query, conn, params=(register_id,))
        elif search_method == "registratorid":
            query = """
                SELECT DISTINCT r.TypeId
                FROM TBL_Register r
                WHERE r.RegistratorID = ?
            """
            try:
                registrator_id = int(ean_value)
            except ValueError:
                return set()
            with engine.connect() as conn:
                df_temp = pd.read_sql_query(query, conn, params=(registrator_id,))
        else:
            return set()
        if df_temp.empty:
            return set()
        return set(df_temp['TypeId'].unique())
    except Exception as e:
        logger.error(f"Error fetching TypeIDs: {e}")
        return set()

def fetch_min_max_period(ean_value: str,
    allowed_typeids_str: str,
    start_date: datetime,
    end_date: datetime
) -> Tuple[Optional[datetime], Optional[datetime]]:
    search_method = search_method_dropdown.value
    cache_key = (ean_value, allowed_typeids_str, start_date, end_date, 'minmax', search_method)
    cached = min_max_cache.get(cache_key)
    if cached is not None:
        logger.info("Min/Max periode uit cache gehaald.")
        return cached
    start_date_str = start_date.strftime('%d/%m/%Y %H:%M')
    end_date_str = end_date.strftime('%d/%m/%Y %H:%M')
    sp_query = """
        EXEC [dbo].[usp_GetMinMaxPeriodForEAN]
             @EAN_ConnectionPoint = ?,
             @AllowedTypeIDs = ?,
             @StartDateStr = ?,
             @EndDateStr = ?,
             @SearchMethod = ?
    """
    try:
        with engine.connect() as conn:
            df_temp = pd.read_sql_query(
                sp_query, conn,
                params=(ean_value, allowed_typeids_str, start_date_str, end_date_str, search_method)
            )
        if df_temp.empty or pd.isnull(df_temp['MinUTCPeriod'].iloc[0]):
            result = (None, None)
        else:
            result = (df_temp['MinUTCPeriod'].iloc[0], df_temp['MaxUTCPeriod'].iloc[0])
        min_max_cache.set(cache_key, result)
        logger.info("Min/Max periode in cache gezet.")
        return result
    except Exception as e:
        logger.error(f"Error fetching min/max period: {e}")
        return (None, None)

def fetch_full_data(ean_value: str,
    allowed_typeids_str: str,
    start_date: datetime,
    end_date: datetime,
    interval_minutes: int = 5,
    include_status: bool = False
) -> Optional[pd.DataFrame]:
    search_method = search_method_dropdown.value
    cache_key = (ean_value, allowed_typeids_str, start_date, end_date,
    'pivot', search_method, interval_minutes, include_status)
    cached = full_data_cache.get(cache_key)
    if cached is not None:
        logger.info("Volledige data uit cache gehaald.")
        return cached
    start_date_str = start_date.strftime('%d/%m/%Y %H:%M')
    end_date_str = end_date.strftime('%d/%m/%Y %H:%M')
    sp_query = """
        EXEC [dbo].[usp_GetConnectionDataFull]
             @EAN_ConnectionPoint = ?,
             @AllowedTypeIDs      = ?,
             @StartDateStr        = ?,
             @EndDateStr          = ?,
             @SearchMethod        = ?,
             @IntervalMinutes     = ?,
             @IncludeStatus       = ?
    """
    try:
        with engine.connect() as conn:
            df = pd.read_sql_query(
                sp_query, conn,
                params=(
                    ean_value,
                    allowed_typeids_str,
                    start_date_str,
                    end_date_str,
                    search_method,
                    interval_minutes,
                    int(include_status)
                ),
                parse_dates=['utcperiod']
            )
        if df.empty:
            result = None
        else:
            result = df
        full_data_cache.set(cache_key, result)
        logger.info("Volledige data in cache gezet.")
        return result
    except Exception as e:
        logger.error(f"Error fetching full data: {e}")
        return None

def build_dataset(ean_val: str,
    chosen_groups: List[str],
    start_date: datetime,
    end_date: datetime,
    freq_val: str,
    aggregate: bool,
    include_status_flag: bool = False
) -> Optional[pd.DataFrame]:
    typeids_final = []
    for grp in chosen_groups:
        typeids_final.extend(group_typeid_mapping.get(grp, []))
    if not typeids_final:
        logger.warning("Geen TypeIDs in chosen_groups.")
        return None
    allowed_typeids_str = ",".join(str(tid) for tid in set(typeids_final))
    minp, maxp = fetch_min_max_period(ean_val, allowed_typeids_str, start_date, end_date)
    if not minp or not maxp:
        logger.info("Geen data (minp, maxp is None).")
        return None
    if freq_val in get_freq_minutes.__globals__['FREQS']:
        interval_minutes = get_freq_minutes(freq_val)
    else:
        logger.warning(f"Frequency value '{freq_val}' not found in FREQS. Defaulting to 5 minutes for interval_minutes.")
        interval_minutes = 5
    sp_include_status = (not aggregate) and include_status_flag
    df_full = fetch_full_data(
        ean_val,
        allowed_typeids_str,
        start_date,
        end_date,
        interval_minutes=interval_minutes,
        include_status=sp_include_status
    )
    if df_full is None or df_full.empty:
        logger.info("Lege dataset (df_full).")
        return None
    df_f = df_full[
        (df_full['utcperiod'] >= start_date) &
        (df_full['utcperiod'] <= end_date)
    ].copy()
    if df_f.empty:
        logger.info("Geen rijen binnen periode.")
        return None
    df_f.set_index('utcperiod', inplace=True)
    if freq_val.lower() == 'auto':
        periods = df_f.index.sort_values()
        freq_val = detect_auto_frequency(periods)
        logger.info(f"Automatisch gedetecteerde frequentie: {freq_val}")
    if freq_val not in get_freq_minutes.__globals__['FREQS']:
        logger.error(f"Ongeldige frequentie '{freq_val}' voor resampling.")
        return None
    if aggregate:
        df_reset = df_f.reset_index()
        selected_mapping = {grp: group_typeid_mapping[grp] for grp in chosen_groups if grp in group_typeid_mapping}
        df_grouped = group_columns_by_typeid(df_reset, engine, selected_mapping)
        df_grouped.set_index('utcperiod', inplace=True)
        df_interest = df_grouped
    else:
        selected_mapping = {grp: group_typeid_mapping[grp] for grp in chosen_groups if grp in group_typeid_mapping}
        df_interest = filter_columns_by_selected_groups(df_f, engine, selected_mapping)
    if df_interest.empty:
        logger.info("DataFrame is leeg na filteren/groeperen (df_interest).")
        return None
    if not isinstance(df_interest.index, pd.DatetimeIndex):
        logger.warning("df_interest index is not a DatetimeIndex. Attempting conversion.")
        try:
            df_interest.index = pd.to_datetime(df_interest.index)
        except Exception as e:
            logger.error(f"Kon df_interest.index niet converteren naar DatetimeIndex: {e}")
            return None
    if df_interest.index.min() is pd.NaT or df_interest.index.max() is pd.NaT:
        logger.warning("df_interest index contains NaT values or is empty after conversion. Cannot proceed with resampling.")
        return None
    distributed_df = resample_dataframe(df_interest, freq_val, method='sum')
    distributed_df = distributed_df.reset_index()
    if 'index' in distributed_df.columns:
        distributed_df.rename(columns={'index': 'UTC Period'}, inplace=True)
    elif 'utcperiod' in distributed_df.columns:
        distributed_df.rename(columns={'utcperiod': 'UTC Period'}, inplace=True)
    cols = distributed_df.columns.tolist()
    if 'UTC Period' in cols:
        cols.insert(0, cols.pop(cols.index('UTC Period')))
    distributed_df = distributed_df[cols]
    final_cols = []
    col_list = distributed_df.columns.tolist()
    if 'UTC Period' in col_list:
        final_cols.append('UTC Period')
    consumption_cols = [c for c in col_list if c not in final_cols and '(consumption)' in c.lower()]
    for ccol in consumption_cols:
        final_cols.append(ccol)
        status_candidate = ccol.replace('(consumption)', '(status)')
        if status_candidate in col_list:
            final_cols.append(status_candidate)
    leftover_cols = [c for c in col_list if c not in final_cols]
    final_cols.extend(leftover_cols)
    distributed_df = distributed_df[final_cols]
    return distributed_df

def export_dataset_to_csv(df: pd.DataFrame, filename: str) -> bool:
    if df is None or df.empty:
        logger.warning("Kan niet exporteren: lege DataFrame.")
        return False
    df_export = df.copy()
    if "UTC Period" in df_export.columns:
        df_export["UTC Period"] = pd.to_datetime(df_export["UTC Period"], errors='coerce').dt.strftime('%Y-%m-%d %H:%M:%S')
    df_export.to_csv(filename, index=False)
    logger.info(f"CSV geëxporteerd: {filename}")
    return True

def export_dataset_to_excel(df: pd.DataFrame, filename: str, excel_format: bool, include_status: bool) -> bool:
    if df is None or df.empty:
        logger.warning("Kan niet exporteren: lege DataFrame.")
        return False
    try:
        from xlsxwriter.utility import xl_col_to_name
        with pd.ExcelWriter(filename, engine='xlsxwriter', datetime_format='yyyy-mm-dd hh:mm:ss') as writer:
            df.to_excel(writer, index=False, sheet_name='Dataset')
            workbook = writer.book
            worksheet = writer.sheets['Dataset']
            header_format = workbook.add_format({
                'bold': True,
                'text_wrap': True,
                'align': 'center',
                'valign': 'middle',
                'fg_color': '#F2F2F2',
                'border': 1,
                'border_color': '#808080',
                'font_name': 'Arial',
                'font_size': 10
            })
            data_format = workbook.add_format({
                'border': 1,
                'border_color': '#808080',
                'align': 'center',
                'valign': 'middle',
                'font_name': 'Arial',
                'font_size': 10
            })
            df_columns = df.columns.tolist()
            num_rows = len(df)
            for col_num, value in enumerate(df_columns):
                worksheet.write(0, col_num, value, header_format)
                col_header = str(value).lower()
                if col_header == 'utc period':
                    col_width = 25
                elif 'consumption' in col_header:
                    col_width = 15
                elif 'status' in col_header:
                    col_width = 10
                else:
                    col_width = max(15, len(str(value)) + 2)
                worksheet.set_column(col_num, col_num, col_width, data_format)
            worksheet.set_row(0, 42)
            worksheet.freeze_panes(1, 1)
            if "UTC Period" in df_columns:
                col_index = df_columns.index("UTC Period")
                worksheet.set_column(col_index, col_index, 25, workbook.add_format({
                    'num_format': 'yyyy-mm-dd hh:mm:ss',
                    'align': 'center',
                    'valign': 'middle',
                    'font_name': 'Arial',
                    'font_size': 10
                }))
            status_p_format = workbook.add_format({
                'bg_color': '#FFFFAF',
                'align': 'center',
                'valign': 'middle',
                'border': 1,
                'border_color': '#808080',
                'font_name': 'Arial',
                'font_size': 10
            })
            status_t_format = workbook.add_format({
                'bg_color': '#FFDB69',
                'align': 'center',
                'valign': 'middle',
                'border': 1,
                'border_color': '#808080',
                'font_name': 'Arial',
                'font_size': 10
            })
            status_empty_format = workbook.add_format({
                'bg_color': '#CCFFCC',
                'align': 'center',
                'valign': 'middle',
                'border': 1,
                'border_color': '#808080',
                'font_name': 'Arial',
                'font_size': 10
            })
            if excel_format:
                for col_num, col_name in enumerate(df_columns):
                    if 'consumption' in str(col_name).lower():
                        if include_status:
                            status_col_name = col_name.replace('(consumption)', '(status)')
                            if status_col_name in df_columns:
                                status_col_index = df_columns.index(status_col_name)
                                status_letter = xl_col_to_name(status_col_index)
                                worksheet.conditional_format(1, col_num, num_rows, col_num, {
                                    'type': 'formula',
                                    'criteria': f'=${status_letter}2="P"',
                                    'format': status_p_format,
                                    'stop_if_true': True
                                })
                                worksheet.conditional_format(1, col_num, num_rows, col_num, {
                                    'type': 'formula',
                                    'criteria': f'=${status_letter}2="T"',
                                    'format': status_t_format,
                                    'stop_if_true': True
                                })
                                worksheet.conditional_format(1, col_num, num_rows, col_num, {
                                    'type': 'formula',
                                    'criteria': f'=${status_letter}2=""',
                                    'format': status_empty_format,
                                    'stop_if_true': True
                                })
                        else:
                            worksheet.conditional_format(1, col_num, num_rows, col_num, {
                                'type': '3_color_scale',
                                'min_color': "#FFFFFF",
                                'mid_color': "#FFFFCC",
                                'max_color': "#FFCCCC"
                            })
                for col_num, col_name in enumerate(df_columns):
                    if 'status' in str(col_name).lower():
                        worksheet.conditional_format(1, col_num, num_rows, col_num, {
                            'type': 'cell',
                            'criteria': '==',
                            'value': '"P"',
                            'format': status_p_format
                        })
                        worksheet.conditional_format(1, col_num, num_rows, col_num, {
                            'type': 'cell',
                            'criteria': '==',
                            'value': '"T"',
                            'format': status_t_format
                        })
                        worksheet.conditional_format(1, col_num, num_rows, col_num, {
                            'type': 'cell',
                            'criteria': '==',
                            'value': '""',
                            'format': status_empty_format
                        })
        logger.info(f"Excel bestand opgeslagen: {filename}")
        return True
    except Exception as e:
        logger.error(f"Error exporting to Excel: {e}")
        return False

# === UI CONTROLS, LAYOUT EN INTERACTIES ===
common_layout = widgets.Layout(width='240px', height='35px')
start_datetime_input = widgets.Text(
    value='01/01/2024 00:00',
    placeholder='dd/mm/yyyy HH:MM',
    description='StartDatum:',
    layout=common_layout
)
end_datetime_input = widgets.Text(
    value='31/12/2024 00:00',
    placeholder='dd/mm/yyyy HH:MM',
    description='EindDatum:',
    layout=common_layout
)
freq_selector = widgets.Dropdown(
    options=[('Automatisch (standaard)', 'auto'),
             ('Elke 5 minuten', '5T'), ('Elke 15 minuten', '15T'),
             ('Per uur', 'H'), ('Dagelijks', 'D'),
             ('Wekelijks', 'W'),
             ('Maandelijks', 'ME'), 
             ('Jaarlijks', 'Y')],
    value='auto',
    description='Frequentie:',
    layout=common_layout
)
aggregate_checkbox = widgets.Checkbox(
    value=True,
    description='Geaggregeerd?',
    layout=widgets.Layout(margin='2px 0 2px 0')
)
status_checkbox = widgets.Checkbox(
    value=False,
    description='Include Status? (en pas kleur op consumption aan)',
    disabled=True,
    layout=widgets.Layout(margin='2px 0 2px 0')
)
excel_format_checkbox = widgets.Checkbox(
    value=False,
    description='Voorwaardelijke opmaak Excel?',
    layout=widgets.Layout(margin='2px 0 2px 0')
)
warning_message = widgets.HTML("")
warning_container = widgets.VBox([])
quick_fix_freq_button = widgets.Button(
    description="Wijzig freq naar 1 uur",
    button_style="warning",
    icon="clock-o",
    layout=common_layout
)
quick_fix_date_button = widgets.Button(
    description="Beperk datumbereik",
    button_style="warning",
    icon="calendar",
    layout=common_layout
)
output_area = widgets.Output(layout={'border': '1px solid black'})
data_table_output = widgets.Output(layout={
    'border': '1px solid #ccc', 'overflow_x': 'auto',
    'overflow_y': 'auto', 'max_height': '400px', 'width': '100%'
})
insights_output = widgets.Output(layout={
    'border': '1px solid #ccc', 'overflow_x': 'auto',
    'overflow_y': 'auto', 'max_height': '400px', 'width': '100%'
})
view_tab = widgets.Tab(children=[data_table_output, insights_output])
view_tab.set_title(0, "Dataset")
view_tab.set_title(1, "Inzichten")
btn_load_filters = widgets.Button(description='Zoeken', button_style='info', icon='filter', layout=common_layout)
btn_build_dataset = widgets.Button(description='Laad Dataset', button_style='success', icon='database', disabled=True, layout=common_layout)
btn_view_dataset = widgets.Button(description="Bekijk Dataset", button_style='primary', icon='eye', disabled=True, layout=common_layout)
btn_view_insights = widgets.Button(description="Bekijk Inzichten", button_style='primary', icon='info', disabled=True, layout=common_layout)
btn_download_csv = widgets.Button(description="Download CSV", button_style='primary', icon='download', disabled=True, layout=common_layout)
btn_download_excel = widgets.Button(description="Download XLS", button_style='primary', icon='file-excel-o', disabled=True, layout=common_layout)
btn_reset_filters = widgets.Button(description='Reset Filters', button_style='warning', icon='refresh', layout=common_layout)
ean_input = widgets.Text(
    description='EAN:',
    placeholder='Vul ID/EAN in',
    value='',
    layout=common_layout
)
options_container = widgets.VBox([aggregate_checkbox, status_checkbox, excel_format_checkbox])
options_accordion = widgets.Accordion(children=[options_container])
options_accordion.set_title(0, "Opties")
options_accordion.selected_index = None
group_checkbox_container = widgets.VBox([])
group_accordion = widgets.Accordion(children=[group_checkbox_container])
group_accordion.set_title(0, "Beschikbare groepen")
group_accordion.selected_index = None
row_top = widgets.HBox(
    [ean_input, search_method_dropdown, btn_load_filters, btn_reset_filters],
    layout=widgets.Layout(gap="5px", flex_flow='row wrap')
)
row_dates = widgets.HBox([start_datetime_input, end_datetime_input, freq_selector],
                         layout=widgets.Layout(gap="10px", flex_flow='row wrap'))
action_buttons_row = widgets.HBox(
    [btn_build_dataset, btn_view_dataset, btn_view_insights, btn_download_csv, btn_download_excel],
    layout=widgets.Layout(justify_content='flex-start', flex_flow='row wrap')
)
toggle_filters_button = widgets.Button(
    description="Verberg filters",
    icon='chevron-up',
    button_style='info',
    layout=widgets.Layout(width='150px', height='35px')
)
filters_container = widgets.VBox([
    widgets.HTML("<h3>Filters</h3>"),
    row_top,
    row_dates,
    warning_container,
    options_accordion,
    group_accordion,
    action_buttons_row,
    progress_widget.widget(),  # Progress bar widget
    output_area,
    view_tab
], layout=widgets.Layout(display='block', border='1px solid #ccc', padding='5px'))
final_ui = widgets.VBox([
    toggle_filters_button,
    filters_container
])
display(final_ui)

current_df = None

def toggle_filters_display(b):
    if filters_container.layout.display == 'none':
        filters_container.layout.display = 'block'
        toggle_filters_button.description = "Verberg filters"
        toggle_filters_button.icon = "chevron-up"
    else:
        filters_container.layout.display = 'none'
        toggle_filters_button.description = "Toon filters"
        toggle_filters_button.icon = "chevron-down"
toggle_filters_button.on_click(toggle_filters_display)

def adjust_dates_on_freq_change(change):
    freq_value = change['new'] if isinstance(change, dict) else change
    if freq_value == 'auto':
        return
    start_dt_str = start_datetime_input.value
    end_dt_str = end_datetime_input.value
    start_dt_parsed = parse_user_datetime(start_dt_str)
    end_dt_parsed = parse_user_datetime(end_dt_str)
    if start_dt_parsed is None:
        logger.warning(f"Ongeldige startdatum '{start_dt_str}' bij aanpassen frequentie.")
        return
    if end_dt_parsed is None:
        logger.warning(f"Ongeldige einddatum '{end_dt_str}' bij aanpassen frequentie.")
        return
    adjusted_start_dt = round_datetime_to_freq(start_dt_parsed, freq_value, is_start=True)
    adjusted_end_dt = round_datetime_to_freq(end_dt_parsed, freq_value, is_start=False)
    if adjusted_start_dt:
        start_datetime_input.value = adjusted_start_dt.strftime('%d/%m/%Y %H:%M')
    if adjusted_end_dt:
        end_datetime_input.value = adjusted_end_dt.strftime('%d/%m/%Y %H:%M')

def validate_data_request(change=None):
    start_dt = parse_user_datetime(start_datetime_input.value)
    end_dt = parse_user_datetime(end_datetime_input.value)
    current_warnings = []
    if not start_dt:
        current_warnings.append("Ongeldige startdatum/tijd (formaat dd/mm/yyyy HH:MM)!")
    if not end_dt:
        current_warnings.append("Ongeldige einddatum/tijd (formaat dd/mm/yyyy HH:MM)!")
    if start_dt and end_dt and end_dt < start_dt:
        current_warnings.append("Einddatum mag niet vóór de startdatum liggen.")
    freq_val = freq_selector.value
    if freq_val.lower() != 'auto' and start_dt and end_dt and end_dt >= start_dt:
        is_ok, expected_rows = check_max_rows(start_dt, end_dt, freq_val, max_rows=MAX_ROWS)
        if not is_ok:
            warning_text = (
                f"U probeert te veel data op te vragen ({int(expected_rows)} rijen bij freq '{freq_val}'). "
                "Verklein het datumbereik of verhoog de resolutie (bijv. naar 'Per uur')."
            )
            current_warnings.append(warning_text)
            warning_container.children = [
                widgets.HTML(f"<span style='color:red; font-weight:bold;'>{'; '.join(current_warnings)}</span>"),
                widgets.HBox([quick_fix_freq_button, quick_fix_date_button], layout=widgets.Layout(justify_content='center'))
            ]
            return
    if current_warnings:
        warning_message.value = f"<span style='color:red; font-weight:bold;'>{'; '.join(current_warnings)}</span>"
        quick_fix_buttons_display = []
        if any("te veel data" in warn_msg for warn_msg in current_warnings):
            quick_fix_buttons_display = [widgets.HBox([quick_fix_freq_button, quick_fix_date_button], layout=widgets.Layout(justify_content='center'))]
        warning_container.children = [warning_message] + quick_fix_buttons_display
    else:
        warning_message.value = ""
        warning_container.children = []

def on_aggregate_change(change):
    status_checkbox.disabled = change['new']
aggregate_checkbox.observe(on_aggregate_change, names='value')
status_checkbox.disabled = aggregate_checkbox.value

def on_freq_change_and_validate(change):
    adjust_dates_on_freq_change(change)
    validate_data_request(change)

start_datetime_input.observe(validate_data_request, names="value")
end_datetime_input.observe(validate_data_request, names="value")
freq_selector.observe(on_freq_change_and_validate, names='value')

def quick_fix_freq_action(b):
    freq_selector.value = 'H'
quick_fix_freq_button.on_click(quick_fix_freq_action)

def quick_fix_date_action(b):
    start_dt = parse_user_datetime(start_datetime_input.value)
    if not start_dt:
        return
    freq_val = freq_selector.value
    if freq_val == 'auto':
        logger.info("Quick fix date: Freq is 'auto', using 'H' for calculation.")
        freq_val = 'H'
    seconds_per_interval = get_freq_seconds(freq_val)
    if seconds_per_interval <= 0:
        seconds_per_interval = 3600
    max_duration = (MAX_ROWS - 1) * seconds_per_interval
    new_end_dt = start_dt + timedelta(seconds=max_duration)
    end_datetime_input.value = new_end_dt.strftime('%d/%m/%Y %H:%M')
    validate_data_request()
quick_fix_date_button.on_click(quick_fix_date_action)

def load_filters_thread(ean_val: str):
    btn_build_dataset.disabled = True
    btn_view_dataset.disabled = True
    btn_view_insights.disabled = True
    btn_download_csv.disabled = True
    btn_download_excel.disabled = True
    with output_area:
        clear_output()
        print("Filters worden geladen...")
    typeids = fetch_typeids_for_ean(ean_val)
    if not typeids:
        with output_area:
            clear_output()
            print("Geen TypeIDs gevonden voor deze invoer.")
        group_checkbox_container.children = []
        group_accordion.selected_index = None
        return
    relevant_groups = []
    for grp, tlist in group_typeid_mapping.items():
        if set(tlist).intersection(typeids):
            relevant_groups.append(grp)
    if not relevant_groups:
        with output_area:
            clear_output()
            print("Geen relevante groepen gevonden voor de TypeIDs.")
        group_checkbox_container.children = []
        group_accordion.selected_index = None
        return
    checkboxes = []
    for grp in sorted(relevant_groups):
        cb = widgets.Checkbox(value=True, description=grp, indent=False)
        checkboxes.append(cb)
    group_checkbox_container.children = checkboxes
    group_accordion.selected_index = 0
    btn_build_dataset.disabled = False
    with output_area:
        clear_output()
        print(f"Filters geladen. Beschikbare groepen: {', '.join(sorted(relevant_groups))}")

def on_load_filters_clicked(b):
    ean_val = ean_input.value.strip()
    if not ean_val:
        with output_area:
            clear_output()
            print("Vul een EAN of ID in.")
        return
    threading.Thread(target=load_filters_thread, args=(ean_val,)).start()

def on_reset_filters_clicked(b):
    if group_checkbox_container.children:
        for cb in group_checkbox_container.children:
            cb.value = True
    ean_input.value = ''
    start_datetime_input.value = '01/01/2024 00:00'
    end_datetime_input.value = '31/12/2024 00:00'
    freq_selector.value = 'auto'
    aggregate_checkbox.value = True
    status_checkbox.value = False
    excel_format_checkbox.value = False
    status_checkbox.disabled = aggregate_checkbox.value
    with output_area:
        clear_output()
        print("Filters zijn gereset.")
    validate_data_request()

def get_selected_groups() -> List[str]:
    return [cb.description for cb in group_checkbox_container.children if cb.value]

def build_dataset_thread():
    global current_df
    btn_build_dataset.disabled = True
    btn_view_dataset.disabled = True
    btn_view_insights.disabled = True
    btn_download_csv.disabled = True
    btn_download_excel.disabled = True
    progress_widget.show(status="Dataset wordt opgebouwd...")  # Toon en reset progress bar
    with output_area:
        clear_output(wait=True)
    ean_val = ean_input.value.strip()
    if not ean_val:
        with output_area:
            print("Vul een EAN/ID in.")
        progress_widget.update(100, "Fout: EAN/ID ontbreekt", error=True)
        progress_widget.finish()
        btn_build_dataset.disabled = False
        return
    validate_data_request()
    if warning_container.children:
        with output_area:
            display(HTML("<span style='color:red; font-weight:bold;'>Los de validatiefouten op voordat u de dataset bouwt.</span>"))
        progress_widget.update(100, "Validatiefout", error=True)
        progress_widget.finish()
        btn_build_dataset.disabled = False
        return
    start_dt = parse_user_datetime(start_datetime_input.value)
    end_dt = parse_user_datetime(end_datetime_input.value)
    if not start_dt or not end_dt:
        with output_area:
            print("Interne fout: Ongeldige datums ondanks validatie.")
        progress_widget.update(100, "Fout: Interne datumfout", error=True)
        progress_widget.finish()
        btn_build_dataset.disabled = False
        return
    freq_val = freq_selector.value
    agg_val = aggregate_checkbox.value
    status_val = status_checkbox.value
    chosen = get_selected_groups()
    if not chosen:
        with output_area:
            print("Geen groepen geselecteerd.")
        progress_widget.update(100, "Fout: Geen groepen", error=True)
        progress_widget.finish()
        btn_build_dataset.disabled = False
        return
    progress_widget.update(30, "TypeIDs en min/max periode ophalen...")
    df_resampled = build_dataset(
        ean_val,
        chosen,
        start_dt,
        end_dt,
        freq_val,
        agg_val,
        include_status_flag=status_val
    )
    progress_widget.update(80, "Dataset verwerken...")
    if df_resampled is None or df_resampled.empty:
        progress_widget.update(100, "Geen dataset opgehaald.", error=True)
        with output_area:
            print("Geen dataset opgehaald. Controleer filters of logs voor details.")
        progress_widget.finish()
        btn_build_dataset.disabled = False
        return
    current_df = df_resampled.copy()
    progress_widget.update(100, f"Dataset geladen met {len(current_df)} rijen.")
    with output_area:
        clear_output(wait=True)
        print(f"Dataset succesvol geladen met {len(current_df)} rijen.\nU kunt nu de dataset bekijken, inzichten genereren of downloaden.")
    btn_view_dataset.disabled = False
    btn_view_insights.disabled = False
    btn_download_csv.disabled = False
    btn_download_excel.disabled = False
    progress_widget.finish()
    btn_build_dataset.disabled = False

def on_build_dataset_clicked(b):
    with data_table_output: clear_output()
    with insights_output: clear_output()
    threading.Thread(target=build_dataset_thread).start()

def show_dataset_table():
    with data_table_output:
        clear_output(wait=True)
        if current_df is None or current_df.empty:
            print("Nog geen dataset geladen of de dataset is leeg.")
        else:
            limit = 50
            html_table = current_df.head(limit).to_html(classes="dataframe", index=False, escape=True)
            info_message = ""
            if len(current_df) > limit:
                info_message = f"<p><i>Toont de eerste {limit} rijen van {len(current_df)} totale rijen. Download voor de volledige dataset.</i></p>"
            scrollable_html = f"""
            <div style="overflow-x: auto; overflow-y: auto; max-height: 380px; width: 100%;">
                {html_table}
            </div>
            {info_message}
            """
            display(HTML(scrollable_html))
    view_tab.selected_index = 0

def show_insights():
    with insights_output:
        clear_output(wait=True)
        if current_df is None or current_df.empty:
            print("Nog geen dataset geladen of de dataset is leeg om inzichten te genereren.")
        else:
            insights_html = generate_insights(current_df)
            display(HTML(f"<div style='font-family: Roboto, sans-serif; font-size:14px;'>{insights_html}</div>"))
    view_tab.selected_index = 1

def on_view_dataset_clicked(b):
    show_dataset_table()

def on_view_insights_clicked(b):
    show_insights()

def on_download_csv_clicked(b):
    if current_df is None or current_df.empty:
        with output_area:
            clear_output(wait=True)
            print("Geen dataset om te downloaden.")
        return
    ean_val = ean_input.value.strip().replace(" ", "_").replace("/", "_")
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename_base = f"dataset_{ean_val}_{ts}.csv"
    downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads")
    if not os.path.isdir(downloads_folder):
        try:
            os.makedirs(downloads_folder)
        except OSError:
            logger.warning(f"Kon map {downloads_folder} niet aanmaken. CSV wordt opgeslagen in huidige werkmap.")
            downloads_folder = os.getcwd()
    filename = os.path.join(downloads_folder, filename_base)
    if export_dataset_to_csv(current_df, filename):
        with output_area:
            clear_output(wait=True)
            display(HTML(f"<p>CSV bestand opgeslagen in uw Downloads map: <code>{filename}</code></p>"
                         f"<p>Als de download niet automatisch start, kunt u het bestand hier vinden.</p>"))
    else:
        with output_area:
            clear_output(wait=True)
            print("Fout bij exporteren naar CSV.")

def on_download_excel_clicked(b):
    if current_df is None or current_df.empty:
        with output_area:
            clear_output(wait=True)
            print("Geen dataset om te downloaden.")
        return
    ean_val = ean_input.value.strip().replace(" ", "_").replace("/", "_")
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename_base = f"dataset_{ean_val}_{ts}.xlsx"
    downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads")
    if not os.path.isdir(downloads_folder):
        try:
            os.makedirs(downloads_folder)
        except OSError:
            logger.warning(f"Kon map {downloads_folder} niet aanmaken. Excel wordt opgeslagen in huidige werkmap.")
            downloads_folder = os.getcwd()
    filename = os.path.join(downloads_folder, filename_base)
    apply_excel_format = excel_format_checkbox.value
    include_status_for_formatting = status_checkbox.value and not aggregate_checkbox.value
    if export_dataset_to_excel(current_df, filename, apply_excel_format, include_status_for_formatting):
        with output_area:
            clear_output(wait=True)
            display(HTML(f"<p>Excel bestand opgeslagen in uw Downloads map: <code>{filename}</code></p>"
                         f"<p>Als de download niet automatisch start, kunt u het bestand hier vinden.</p>"))
    else:
        with output_area:
            clear_output(wait=True)
            print("Fout bij exporteren naar Excel.")

btn_load_filters.on_click(on_load_filters_clicked)
btn_reset_filters.on_click(on_reset_filters_clicked)
btn_build_dataset.on_click(on_build_dataset_clicked)
btn_view_dataset.on_click(on_view_dataset_clicked)
btn_view_insights.on_click(on_view_insights_clicked)
btn_download_csv.on_click(on_download_csv_clicked)
btn_download_excel.on_click(on_download_excel_clicked)

# Initial validation call to check default dates
validate_data_request()
adjust_dates_on_freq_change({'new': freq_selector.value})
validate_data_request()

ModuleNotFoundError: No module named 'common_imports'