In [5]:
import pandas as pd
from datetime import datetime

# Path to your CSV file
csv_file = "C:/Users/peter/Desktop/Speciale/DS_thesis/P10. 2024. Pilot dashboard (1)/2024-10-04_Results_Cell-Content_Medium_Tech_RS-FV-New.csv"

# Read the CSV without headers and transpose
df_transposed = pd.read_csv(csv_file, sep=';', header=None).T

# Set the first row as column headers
df_transposed.columns = df_transposed.iloc[0]
df_transposed = df_transposed[1:].reset_index(drop=True)

# Convert all columns to string to ensure consistency
df_transposed = df_transposed.astype(str)

# Split the DataFrame into table1 and table2
table1 = df_transposed.iloc[:, 1:20].copy().reset_index(drop=True)
table2 = df_transposed.iloc[:, 21:35].copy().reset_index(drop=True)

# Define the starting date
start_date_str = "2023-12-11" # The actual date for this analytics is 2024-10-04
start_date = datetime.strptime(start_date_str, "%Y-%m-%d")

# Function to add DateTime column
def add_datetime_column(table, start_date):
    if 'Sample Day' in table.columns:
        sample_day_col = table['Sample Day'].str.replace(',', '.').astype(float)
        sample_day_timedelta = pd.to_timedelta(sample_day_col, unit='D')
        datetime_col = pd.to_datetime(start_date) + sample_day_timedelta
        datetime_col_formatted = datetime_col.dt.strftime('%d-%m-%Y %H:%M:%S')
        table['DateTime'] = datetime_col_formatted
    else:
        print("Error: 'Sample Day' column not found in the table.")
    return table

# Apply the function to both tables
table1 = add_datetime_column(table1, start_date)
table2 = add_datetime_column(table2, start_date)

# Convert 'DateTime' to datetime objects
table1['DateTime'] = pd.to_datetime(table1['DateTime'], format='%d-%m-%Y %H:%M:%S')
table2['DateTime'] = pd.to_datetime(table2['DateTime'], format='%d-%m-%Y %H:%M:%S')

# Exclude columns we don't want to process
exclude_columns = ['Sample Day', 'SAMPLE I.D', 'DateTime']

# Process numeric columns in table1
data_columns_table1 = [col for col in table1.columns if col not in exclude_columns and pd.notnull(col)]
for col in data_columns_table1:
    table1[col] = table1[col].str.replace(',', '.')
    table1[col] = pd.to_numeric(table1[col], errors='coerce')

# Process numeric columns in table2
data_columns_table2 = [col for col in table2.columns if col not in exclude_columns and pd.notnull(col)]
for col in data_columns_table2:
    table2[col] = table2[col].str.replace(',', '.')
    table2[col] = pd.to_numeric(table2[col], errors='coerce')

# Now select numeric columns
numeric_columns_table1 = table1.select_dtypes(include=['float64', 'int64'])
numeric_columns_table2 = table2.select_dtypes(include=['float64', 'int64'])

# Get the list of columns for dropdown options
# We prefix the column names with the table name to avoid duplicates
columns_table1_prefixed = [f"Table1: {col}" for col in numeric_columns_table1.columns if col not in exclude_columns]
columns_table2_prefixed = [f"Table2: {col}" for col in numeric_columns_table2.columns if col not in exclude_columns]

# Combine all columns
all_columns = columns_table1_prefixed + columns_table2_prefixed

# Create a mapping from display names to actual column names and tables
column_mapping = {}
for display_name, col_name in zip(columns_table1_prefixed, numeric_columns_table1.columns):
    column_mapping[display_name] = ('table1', col_name)
for display_name, col_name in zip(columns_table2_prefixed, numeric_columns_table2.columns):
    column_mapping[display_name] = ('table2', col_name)

In [None]:
import os
from datetime import datetime
import dash
from dash import dcc, html, dash_table
from dash.dependencies import Input, Output, State
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc

#app = dash.Dash(__name__)
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
#server = app.server  for potential deployment

# Directory containing CSV files
directory = "C:/Users/peter/Desktop/Speciale/DS_thesis/P10. 2024. Pilot dashboard (1)"
output_directory = "C:/Users/peter/Desktop/Speciale/DS_thesis/P10, 2024, saved files"

# List of variable names to filter
variable_names = [
    "AI Values_78TT001 - Analog input",
    "AI Values_78TT002 - Analog input",
    "AI Values_10TT002 - Analog input",
    "AI Values_20TTC001 - Analog input",
    "AI Values_20FTC003 - analog input",
    "AI Values_78FT001 - Analog input",
    "AI Values_20FTC002 - Analog input",
    "AI Values_20XTC001 - Analog input",
    "AI Values_20XTC002 - Analog input",
    "AI Values_20XT004 - Analog input",
    "AI Values_20XTC003 - Analog input",
    "AI Values_10PT001 - Analog input",
    "30P001.HMI.DATA_2",
    "30P002.HMI.DATA_2",
    "30P001.HMI.STATUS",
    "AO Values_10R001",
    "AO Values_10R002",
    "AO Values_10R003",
    "AI Values_20PT004 - Analog input",
    "AI Values_78PT002 - Analog input",
    "AI Values_78PT001 - Analog input",
]

variable_units = {
    "AI Values_78TT001 - Analog input": "Temperature (°C)",
    "AI Values_78TT002 - Analog input": "Temperature (°C)",
    "AI Values_10TT002 - Analog input": "Temperature (°C)",
    "AI Values_20TTC001 - Analog input": "Temperature (°C)",
    "AI Values_20FTC003 - analog input": "Flowrate normal L/min",
    "AI Values_78FT001 - Analog input": "Flowrate m3/h",
    "AI Values_20FTC002 - Analog input": "Flowrate m3/h",
    "AI Values_20XTC001 - Analog input": "pH",
    "AI Values_20XTC002 - Analog input": "pH",
    "AI Values_20XT004 - Analog input": "Dissolved oxygen (%)",
    "AI Values_20XTC003 - Analog input": "Dissolved oxygen (%)",
    "AI Values_10PT001 - Analog input": "Pressure (bar?)",
    "30P001.HMI.DATA_2": "Pump state (ON/OFF)",
    "30P002.HMI.DATA_2": "Pump state (ON/OFF)",
    "30P001.HMI.STATUS": "Pump state (?)",
    "AO Values_10R001": "Light intensity (%)",
    "AO Values_10R002": "Light intensity (%)",
    "AO Values_10R003": "Light intensity (%)",
    "AI Values_20PT004 - Analog input": "Pressure (bar)",
    "AI Values_78PT002 - Analog input": "Pressure (bar)",
    "AI Values_78PT001 - Analog input": "Pressure (bar)",
}

variable_display_names = {
    "AI Values_78TT001 - Analog input": "Cooling circuit, before PBR (°C)",
    "AI Values_78TT002 - Analog input": "Before the PBR (°C)",
    "AI Values_10TT002 - Analog input": "Cooling circuit, after PBR (°C)",
    "AI Values_20TTC001 - Analog input": "After the PBR (°C)",
    "AI Values_20FTC003 - analog input": "CO2 inlet (normal L/min)",
    "AI Values_78FT001 - Analog input": "Cooling fluid  (Flowrate m3/h)",
    "AI Values_20FTC002 - Analog input": "From degasser to PBR (Flowrate m3/h)",
    "AI Values_20XTC001 - Analog input": "After the PBR1 (pH)",
    "AI Values_20XTC002 - Analog input": "After the PBR2 (pH)",
    "AI Values_20XT004 - Analog input": "Before the PBR (Dissolved oxygen %)",
    "AI Values_20XTC003 - Analog input": "After the PBR (Dissolved oxygen %)",
    "AI Values_10PT001 - Analog input": "Inside the PBR? Pressure (bar?)",
    "30P001.HMI.DATA_2": "Nutrient drum (ON/OFF)",
    "30P002.HMI.DATA_2": "IBC (ON/OFF)",
    "30P001.HMI.STATUS": "Nutrient drum (Pump state ?)",
    "AO Values_10R001": "Top layer (Light intensity %)",
    "AO Values_10R002": "Middle layer (Light intensity %)",
    "AO Values_10R003": "Bottom layer (Light intensity %)",
    "AI Values_20PT004 - Analog input": "After the PBR Pressure (bar)",
    "AI Values_78PT002 - Analog input": "Cooling circuit, after PBR Pressure (bar)",
    "AI Values_78PT001 - Analog input": "Cooling circuit, before PBR Pressure (bar)",
}

# Define units for new variables
new_variable_units = {
    "Table1: % CARBOHYDRATE": "%",
    "Table1: % PROTEIN": "%",
    "Table1: % OIL": "%",
    "Table1: EPA % DM": "%",
    "Table1: EPA % FA": "%",
    "Table1: Total saturated %": "%",
    "Table1: Total saturated mg. 100g-1 ": "mg. 100g-1",
    "Table1: Total monounsaturated % total fatty acids": "%",
    "Table1: Total monounsaturated mg. 100g-1": "mg. 100g-1",
    "Table1: Total n-6 PUFA % total fatty acids": "%",
    "Table1: Total n-6 PUFA mg. 100g-1": "mg. 100g-1",
    "Table1: Total n-3 PUFA % total fatty acids": "%",
    "Table1: Total n-3 PUFA mg. 100g-1": "mg. 100g-1",
    "Table1: Total PUFA % total fatty acids": "%",
    "Table1: Total PUFA mg. 100g-1": "mg. 100g-1",
    "Table1: Total FA % DM": "%",
    "Table2: Sodium": "mg/kg",
    "Table2: Magnesium": "mg/kg",
    "Table2: Phosphorus": "mg/kg",
    "Table2: Potassium": "mg/kg",
    "Table2: Calcium": "mg/kg",
    "Table2: Mangan": "mg/kg",
    "Table2: Iron": "mg/kg",
    "Table2: Copper": "mg/kg",
    "Table2: Zinc": "mg/kg",
    "Table2: Arsenic": "mg/kg",
    "Table2: Lead": "mg/kg",
}

# List of variables to skip for outlier removal and downsampling
skip_variables = [
    "30P001.HMI.DATA_2",  # Binary variable
    "30P002.HMI.DATA_2",  # Binary variable
    "30P001.HMI.STATUS",  # Binary variable
    "AO Values_10R001",   # Percentage variable
    "AO Values_10R002",   # Percentage variable 
    "AO Values_10R003",   # Percentage variable 
]

# Dictionary to store merged DataFrames for all variables
merged_dataframes = {var: pd.DataFrame() for var in variable_names}

# Store filename prefixes entered by the user
filename_prefixes = []

# List CSV files in the directory
def list_csv_files(directory):
    return [f for f in os.listdir(directory) if f.endswith('.csv')]

# Filter files by date
def filter_files_by_date(files, start_date, end_date):
    filtered_files = []
    for file in files:
        try:
            file_date_str = file.split('_')[2]
            file_date = datetime.strptime(file_date_str, "%Y%m%d")
            if start_date <= file_date <= end_date:
                filtered_files.append(file)
        except Exception as e:
            print(f"Error processing file {file}: {e}")
    return filtered_files

# Function to process a single CSV file and extract variables into DataFrames
def process_csv_file(directory, csv_file, variable_names):
    os.chdir(directory)
    df = pd.read_csv(csv_file, delimiter=';', on_bad_lines='skip')
    dataframes = {}
    for var_name in variable_names:
        filtered_df = df[df['VarName'] == var_name]
        if not filtered_df.empty:
            dataframes[var_name] = filtered_df
            print(f"Extracted {var_name} with {len(filtered_df)} rows.")
    return dataframes

# Function to extract filename prefixes from the saved files in the output directory
def extract_prefixes_from_saved_files(output_directory):
    prefixes = set()
    for file in os.listdir(output_directory):
        if file.endswith('.csv'):
            prefix = file.split('_')[0]
            prefixes.add(prefix)
    return sorted(list(prefixes))

# Function to remove outliers using the Interquartile Range (IQR) method
def remove_outliers(df, column):
    if column in skip_variables:
        # Skip outlier removal for variables in the skip list
        return df
    
    # Apply IQR outlier removal for other variables
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

# Predefined unit lists for different types
unit_options = {
    'Numerical': ['°C', '°F', 'K', 'L/min', 'm³/h', 'bar', 'Pa', 'atm'],
    'Binary': ['ON/OFF'],
    'Restricted Strings': []  # Binary has fixed labels; Restricted Strings will have custom labels
}

# Predefined labels for restricted string variables
default_restricted_labels = ['Low', 'Medium', 'High', 'Critical']


app.layout = dbc.Container([
    # Hidden stores to keep track of variables and data entries
    dcc.Store(id='variables-store', data={} ),  # Stores variables with their attributes
    dcc.Store(id='data-entries-store', data=[] ),  # Stores data entries
    
    dbc.Row([
        # Left Column
        dbc.Col([
            # Define Batch Section
            dbc.Card([
                dbc.CardHeader(html.H3('Define Batch')),
                dbc.CardBody([
                    dbc.FormGroup([
                        dbc.Label('Select Date Range'),
                        dcc.DatePickerRange(
                            id='date-picker-range',
                            start_date=datetime(2024, 1, 1),
                            end_date=datetime(2024, 12, 31),
                            display_format='YYYY-MM-DD',
                            style={'width': '100%'}
                        ),
                    ]),
                    dbc.FormGroup([
                        dbc.Label('Enter Batch Name'),
                        dbc.Input(
                            id='filename-input',
                            type='text',
                            placeholder='Enter batch name',
                        ),
                    ]),
                    dbc.Button('Confirm', id='confirm-button', color='primary', block=True, className='mt-2'),
                    html.Div(id='file-list', style={'whiteSpace': 'pre-line', 'marginTop': '10px'}),
                    html.Div(id='file-save-status', style={'marginTop': '10px', 'color': 'green'}),
                ])
            ], className='mb-4'),
            # Batch Processing Section
            dbc.Card([
                dbc.CardHeader(html.H2('Batch & Variable selection')),
                dbc.CardBody([
                    # Prefix Dropdown
                    dbc.FormGroup([
                        dbc.Label('Select Batch'),
                        dcc.Dropdown(
                            id='prefix-dropdown',
                            placeholder='Select batch',
                            multi=True
                        ),
                    ]),
                    # Existing Variable Dropdown
                    dbc.FormGroup([
                        dbc.Label('Select process data variable(s)'),
                        dcc.Dropdown(
                            id='variable-dropdown',
                            placeholder='Select variable',
                            multi=True
                        ),
                    ]),
                    # New Variables Dropdown
                    dbc.FormGroup([
                        dbc.Label('Select offline data variable(s)'),
                        dcc.Dropdown(
                            id='new-variable-dropdown',
                            options=[{'label': col, 'value': col} for col in all_columns],
                            placeholder='Select new variable(s)',
                            multi=True
                        ),
                    ]),
                    # Time Mode Switch
                    dbc.FormGroup([
                        dbc.Label('Time Mode'),
                        dbc.RadioItems(
                            id='time-mode-switch',
                            options=[
                                {'label': 'Absolute Time', 'value': 'absolute'},
                                {'label': 'Elapsed Time', 'value': 'elapsed'},
                            ],
                            value='absolute',
                            inline=True
                        ),
                    ]),
                ])
            ], className='mb-4'),
            # Variable Graph
            dbc.Card([
                dbc.CardHeader(html.H3('Variable Graph')),
                dbc.CardBody([
                    dcc.Graph(id='variable-graph')
                ])
            ]),
        ], width=8),
        # Right Column
        dbc.Col([
            # Create New Variable Section
            dbc.Card([
                dbc.CardHeader(html.H2('Create New Variable')),
                dbc.CardBody([
                    # Variable Name
                    dbc.FormGroup([
                        dbc.Label('Variable Name'),
                        dbc.Input(
                            id='new-variable-name',
                            type='text',
                            placeholder='Variable Name'
                        ),
                    ]),
                    # Variable Type
                    dbc.FormGroup([
                        dbc.Label('Data Type'),
                        dcc.Dropdown(
                            id='variable-type',
                            options=[
                                {'label': 'Numerical', 'value': 'Numerical'},
                                {'label': 'Binary', 'value': 'Binary'},
                                {'label': 'Restricted Strings', 'value': 'Restricted Strings'}
                            ],
                            placeholder='Select Data Type'
                        ),
                    ]),
                    # Restricted String Labels
                    dbc.FormGroup([
                        dbc.Label('Restricted Labels (comma-separated)'),
                        dbc.Input(
                            id='restricted-labels-input',
                            type='text',
                            placeholder='e.g. Low, Medium, High, Critical'
                        ),
                    ], id='restricted-labels-container', style={'display': 'none'}),
                    # Units Option
                    dbc.FormGroup([
                        dbc.Checklist(
                            options=[{'label': 'Add Unit', 'value': 'add_unit'}],
                            value=[],
                            id='add-unit-checkbox',
                            inline=True
                        ),
                    ]),
                    # Unit Dropdown
                    dbc.FormGroup([
                        dbc.Label('Select Unit'),
                        dcc.Dropdown(
                            id='unit-selection-dropdown',
                            placeholder='Select Unit'
                        ),
                    ], id='unit-dropdown-container', style={'display': 'none'}),
                    # Add Variable Button
                    dbc.Button('Add Variable', id='add-variable-button', color='success', block=True),
                    # Variable List
                    html.Div(id='variable-list', className='mt-3'),
                    # Status Message
                    html.Div(id='variable-save-status', className='mt-2', style={'color': 'green'}),
                ])
            ], className='mb-4'),
            # Input Data for Variables
            dbc.Card([
                dbc.CardHeader(html.H2('Input Data for Variables')),
                dbc.CardBody([
                    # Variable Selection
                    dbc.FormGroup([
                        dbc.Label('Select Variable'),
                        dcc.Dropdown(
                            id='variable-select-dropdown',
                            placeholder='Select Variable'
                        ),
                    ]),
                    # Value Input
                    dbc.FormGroup([
                        dbc.Label('Value'),
                        dbc.Input(
                            id='variable-value-input',
                            type='number',
                            placeholder='Enter Value',
                            disabled=True
                        ),
                    ], id='value-input-container'),
                    # Unit Selection
                    dbc.FormGroup([
                        dbc.Label('Unit'),
                        dcc.Dropdown(
                            id='data-unit-dropdown',
                            placeholder='Select Unit'
                        ),
                    ], id='data-unit-container', style={'display': 'none'}),
                    # Extra Notes
                    dbc.FormGroup([
                        dbc.Label('Extra Notes'),
                        dbc.Input(
                            id='extra-notes',
                            type='text',
                            placeholder='Extra Notes'
                        ),
                    ]),
                    # Submit Data Button
                    dbc.Button('Submit Data', id='submit-data-button', color='primary', block=True),
                    # Data Entries Table
                    html.Div([
                        html.H3('Submitted Data Entries', className='mt-4'),
                        dash_table.DataTable(
                            id='data-entries-table',
                            columns=[
                                {'name': 'Variable', 'id': 'variable'},
                                {'name': 'Value', 'id': 'value'},
                                {'name': 'Unit', 'id': 'unit'},
                                {'name': 'Notes', 'id': 'notes'},
                                {'name': 'Timestamp', 'id': 'timestamp'}
                            ],
                            data=[],
                            style_table={'overflowX': 'auto'},
                            style_cell={'padding': '5px', 'textAlign': 'left'},
                            style_header={'backgroundColor': 'lightgrey', 'fontWeight': 'bold'},
                        )
                    ]),
                    # Status Message for Data Submission
                    html.Div(id='data-save-status', className='mt-2', style={'color': 'green'}),
                ])
            ]),
        ], width=4),
    ])
], fluid=True, style={'backgroundColor': '#000000'})

# Callback to show/hide restricted labels input based on variable type
@app.callback(
    Output('restricted-labels-container', 'style'),
    [Input('variable-type', 'value')]
)
def toggle_restricted_labels(variable_type):
    if variable_type == 'Restricted Strings':
        return {'display': 'block', 'marginBottom': '10px'}
    return {'display': 'none'}

# Callback to show/hide unit selection based on checkbox
@app.callback(
    Output('unit-dropdown-container', 'style'),
    [Input('add-unit-checkbox', 'value')],
    [State('variable-type', 'value')]
)
def toggle_unit_dropdown(checkbox_values, variable_type):
    if 'add_unit' in checkbox_values:
        return {'display': 'block', 'marginBottom': '10px'}
    return {'display': 'none'}

# Callback to update unit options based on variable type
@app.callback(
    Output('unit-selection-dropdown', 'options'),
    [Input('variable-type', 'value')]
)
def update_unit_options(variable_type):
    if variable_type in unit_options:
        options = [{'label': unit, 'value': unit} for unit in unit_options[variable_type]]
        if not options:
            return []
        return options
    return []

# Callback to add new variable
@app.callback(
    Output('variables-store', 'data'),
    Output('variable-list', 'children'),
    Output('new-variable-name', 'value'),
    Output('variable-type', 'value'),
    Output('restricted-labels-input', 'value'),
    Output('add-unit-checkbox', 'value'),
    Output('unit-selection-dropdown', 'value'),
    Output('variable-save-status', 'children'),  
    [Input('add-variable-button', 'n_clicks')],
    [
        State('new-variable-name', 'value'),
        State('variable-type', 'value'),
        State('restricted-labels-input', 'value'),
        State('add-unit-checkbox', 'value'),
        State('unit-selection-dropdown', 'value'),
        State('variables-store', 'data')
    ],
    prevent_initial_call=True
)
def add_variable(n_clicks, var_name, var_type, restricted_labels, add_unit, selected_unit, variables_data):
    if n_clicks and var_name and var_type:
        variables = variables_data.copy()
        if var_name in variables:
            # Variable already exists
            variable_list = [html.Div(f"Variable '{var_name}' already exists.", style={'color': 'red'})]
            return (
                dash.no_update, 
                variable_list, 
                dash.no_update, 
                dash.no_update, 
                dash.no_update, 
                dash.no_update, 
                dash.no_update,
                "Variable already exists."
            )

        variable_info = {
            'type': var_type,
            'labels': default_restricted_labels.copy(),
            'unit': selected_unit if add_unit and selected_unit else None
        }

        if var_type == 'Restricted Strings':
            if restricted_labels:
                # Process and order labels
                labels = [label.strip() for label in restricted_labels.split(',') if label.strip()]
                variable_info['labels'] = labels
            else:
                # Use default labels if none provided
                variable_info['labels'] = default_restricted_labels.copy()

        variables[var_name] = variable_info

        # Prepare variable list display
        variable_list = []
        for name, attrs in variables.items():
            var_display = f"{name} ({attrs['type']})"
            if attrs['unit']:
                var_display += f" - Unit: {attrs['unit']}"
            if attrs['type'] == 'Restricted Strings':
                var_display += f" - Labels: {', '.join(attrs['labels'])}"
            variable_list.append(html.Div(var_display))

        return variables, variable_list, '', '', '', [], '', "Variable added successfully."
    return dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update

# Callback to update the variable select dropdown based on created variables
@app.callback(
    Output('variable-select-dropdown', 'options'),
    [Input('variables-store', 'data')]
)
def update_variable_select_options(variables_data):
    options = [{'label': var, 'value': var} for var in variables_data.keys()]
    return options

# Callback to update data input fields based on selected variable
@app.callback(
    Output('value-input-container', 'children'),
    Output('data-unit-container', 'style'),
    Output('data-unit-dropdown', 'options'),
    [Input('variable-select-dropdown', 'value')],
    [State('variables-store', 'data')]
)
def update_data_input(selected_variable, variables_data):
    if selected_variable:
        var_info = variables_data.get(selected_variable, {})
        var_type = var_info.get('type')

        # Define input type and placeholder based on variable type
        if var_type == 'Numerical':
            input_type = 'number'
            placeholder = 'Enter Numerical Value'
        elif var_type == 'Binary':
            input_type = 'text'
            placeholder = 'Enter Binary Value (e.g., ON/OFF)'
        elif var_type == 'Restricted Strings':
            input_type = 'text'
            placeholder = 'Enter Restricted Value'

        # Handle unit display
        if var_info.get('unit'):
            unit_style = {'display': 'block', 'marginBottom': '10px'}
            unit_options_list = [{'label': var_info['unit'], 'value': var_info['unit']}]
        else:
            unit_style = {'display': 'none'}
            unit_options_list = []

        # For Restricted Strings, change input to dropdown
        if var_type == 'Restricted Strings':
            restricted_labels = var_info.get('labels', default_restricted_labels.copy())
            value_input = dcc.Dropdown(
                id='variable-value-input',
                options=[{'label': label, 'value': label} for label in restricted_labels],
                placeholder='Select a value',
                style={'width': '100%', 'marginBottom': '10px'}
            )
        else:
            value_input = dcc.Input(
                id='variable-value-input', 
                type=input_type, 
                placeholder=placeholder, 
                style={'width': '100%', 'marginBottom': '10px'}
            )

        return value_input, unit_style, unit_options_list
    else:
        # If no variable is selected, provide a disabled input
        value_input = dcc.Input(
            id='variable-value-input', 
            type='number', 
            placeholder='Enter Value', 
            style={'width': '100%', 'marginBottom': '10px'}, 
            disabled=True
        )
        return value_input, {'display': 'none'}, []

# Callback to handle unit selection for data input
@app.callback(
    Output('data-unit-dropdown', 'value'),
    [Input('variable-select-dropdown', 'value')],
    [State('variables-store', 'data')]
)
def reset_data_unit(selected_variable, variables_data):
    if selected_variable:
        var_info = variables_data.get(selected_variable, {})
        return var_info.get('unit') if var_info.get('unit') else ''
    return ''

# Callback to submit data
@app.callback(
    Output('data-entries-store', 'data'),
    Output('data-entries-table', 'data'),
    Output('data-save-status', 'children'),  
    [Input('submit-data-button', 'n_clicks')],
    [
        State('variable-select-dropdown', 'value'),
        State('variable-value-input', 'value'),
        State('data-unit-dropdown', 'value'),
        State('extra-notes', 'value'),
        State('variables-store', 'data'),
        State('data-entries-store', 'data')
    ],
    prevent_initial_call=True
)
def submit_data(n_clicks, selected_variable, variable_value, selected_unit, extra_notes, variables_data, data_entries):
    if n_clicks and selected_variable and variable_value is not None:
        var_info = variables_data.get(selected_variable, {})
        var_type = var_info.get('type')

        # Validate binary inputs
        if var_type == 'Binary':
            valid_binary = ['ON', 'OFF']
            if isinstance(variable_value, str) and variable_value.upper() not in valid_binary:
                return dash.no_update, data_entries, "Invalid input for Binary type. Please enter 'ON' or 'OFF'."

        # Validate restricted string inputs
        if var_type == 'Restricted Strings':
            valid_labels = var_info.get('labels', default_restricted_labels.copy())
            if variable_value not in valid_labels:
                return dash.no_update, data_entries, f"Invalid input for Restricted Strings. Valid options: {', '.join(valid_labels)}."

        # Create data entry
        entry = {
            'variable': selected_variable,
            'value': variable_value,
            'unit': selected_unit if selected_unit else 'N/A',
            'notes': extra_notes if extra_notes else '',
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }

        # Append to data entries
        data_entries.append(entry)

        # Return updated data
        return data_entries, data_entries, "Data submitted successfully."
    return dash.no_update, dash.no_update, dash.no_update

# Callback to handle file selection, processing, and saving 
@app.callback(
    Output('file-list', 'children'),
    Output('file-save-status', 'children'),
    Output('prefix-dropdown', 'options'),
    [Input('confirm-button', 'n_clicks')],
    [State('date-picker-range', 'start_date'),
     State('date-picker-range', 'end_date'),
     State('filename-input', 'value')]
)
def update_file_list(n_clicks, start_date, end_date, filename_prefix):
    if n_clicks and start_date and end_date and filename_prefix:
        try:
            start_date_dt = datetime.fromisoformat(start_date)
            end_date_dt = datetime.fromisoformat(end_date)
        except Exception as e:
            return f"Invalid date format: {e}", "", []

        csv_files = list_csv_files(directory)
        selected_files = filter_files_by_date(csv_files, start_date_dt, end_date_dt)

        if not selected_files:
            return "No files selected.", "", []

        os.makedirs(output_directory, exist_ok=True)
        global merged_dataframes
        merged_dataframes = {var: pd.DataFrame() for var in variable_names}

        for file in selected_files:
            csv_file_path = os.path.join(directory, file)
            dataframes = process_csv_file(directory, csv_file_path, variable_names)

            for var_name, df in dataframes.items():
                if not df.empty:
                    merged_dataframes[var_name] = pd.concat([merged_dataframes[var_name], df], ignore_index=True)

        saved_files = []
        for var_name, df in merged_dataframes.items():
            if not df.empty:
                sanitized_var_name = var_name.replace('/', '_').replace('\\', '_')  # Sanitize filename
                output_file = os.path.join(output_directory, f"{filename_prefix}_{sanitized_var_name}.csv")
                df.to_csv(output_file, index=False)
                saved_files.append(output_file)
                print(f"Saved {output_file}")

        if filename_prefix not in filename_prefixes:
            filename_prefixes.append(filename_prefix)

        all_prefixes = sorted(list(set(filename_prefixes + extract_prefixes_from_saved_files(output_directory))))
        prefix_options = [{'label': prefix, 'value': prefix} for prefix in all_prefixes]

        # File display
        file_display = f"Selected Files:\n" + "\n".join(selected_files)
        # Save status
        save_status = f"Files processed and saved with prefix: {filename_prefix}. Saved {len(saved_files)} files."

        return file_display, save_status, prefix_options

    return "No files selected.", "", [{'label': prefix, 'value': prefix} for prefix in extract_prefixes_from_saved_files(output_directory)]
 
# Callback to update the variable dropdown based on selected prefixes 
@app.callback(
    Output('variable-dropdown', 'options'),
    [Input('prefix-dropdown', 'value')]
)
def update_variable_dropdown(selected_prefixes):
    if selected_prefixes:
        variable_options = []
        for prefix in selected_prefixes:
            for var_name in variable_names:
                # Concatenate the prefix with the variable name for display purposes
                labeled_var_name = f"{prefix}_{var_name}"
                # Get the display name if it exists, otherwise use the raw variable name
                display_name = variable_display_names.get(var_name, var_name)
                # Include the batch (prefix) in the display name
                labeled_display_name = f"{prefix}: {display_name}"
                # Add the option with the concatenated display name
                variable_options.append({'label': labeled_display_name, 'value': labeled_var_name})
        return variable_options
    return []

# Callback to handle dynamic Y-axis scaling based on units and downsample data to 1-minute intervals
@app.callback(
    Output('variable-graph', 'figure'),
    [Input('variable-dropdown', 'value'),  # Existing variables
     Input('new-variable-dropdown', 'value'),  # New variables
     Input('time-mode-switch', 'value')]
)
def update_graph(selected_variables, new_variables, time_mode):
    if selected_variables is None:
        selected_variables = []
    if new_variables is None:
        new_variables = []

    if not selected_variables and not new_variables:
        return px.line(title='Please select variables to display.')

    all_data = pd.DataFrame()
    variable_unit_map = variable_units.copy()  # Start with existing units

    # Process existing variables
    for var in selected_variables:
        try:
            prefix, var_name = var.split('_', 1)
        except ValueError:
            continue
        file_path = os.path.join(output_directory, f"{prefix}_{var_name}.csv")

        if os.path.exists(file_path):
            df = pd.read_csv(file_path)

            if 'TimeString' in df.columns and 'VarValue' in df.columns:
                df['TimeString'] = pd.to_datetime(df['TimeString'], format='%d-%m-%Y %H:%M:%S', errors='coerce')
                if df['TimeString'].isnull().all():
                    continue

                df['ElapsedTime'] = (df['TimeString'] - df['TimeString'].min()).dt.total_seconds() / 60

                if df['VarValue'].dtype == 'object':
                    df['VarValue'] = df['VarValue'].str.replace(',', '.').astype(float)

                df = remove_outliers(df, 'VarValue')

                if var_name not in skip_variables:
                    df.set_index('TimeString', inplace=True)
                    df_resampled = df[['VarValue', 'ElapsedTime']].resample('1T').mean().fillna(method='ffill').reset_index()
                else:
                    df_resampled = df.reset_index()

                df_resampled['Variable'] = var_name
                all_data = pd.concat([all_data, df_resampled], ignore_index=True)

    # Process new variables
    for display_name in new_variables:
        table_name, col_name = column_mapping[display_name]

        if table_name == 'table1':
            df = table1
        else:
            df = table2

        df_var = df[['DateTime', col_name, 'SAMPLE I.D']].copy()
        df_var.rename(columns={col_name: 'VarValue', 'DateTime': 'TimeString'}, inplace=True)
        df_var['Variable'] = display_name

        df_var['TimeString'] = pd.to_datetime(df_var['TimeString'])
        df_var['ElapsedTime'] = (df_var['TimeString'] - df_var['TimeString'].min()).dt.total_seconds() / 60
        all_data = pd.concat([all_data, df_var], ignore_index=True)

        # Assign the correct unit to the variable
        unit = new_variable_units.get(display_name, 'Value')  # Replace 'Value' with a default if needed
        variable_unit_map[display_name] = unit

    if not all_data.empty:
        if time_mode == 'elapsed':
            x_axis = 'ElapsedTime'
            x_label = 'Elapsed Time (minutes)'
        else:
            x_axis = 'TimeString'
            x_label = 'Time (Absolute)'

        fig = make_subplots(specs=[[{"secondary_y": True}]])

        # Track used y-axes
        yaxes_used = {}

        for var_name, df_group in all_data.groupby('Variable'):
            # Get the display name from the dictionary, default to the raw variable name if not found
            display_name = variable_display_names.get(var_name, var_name)
            yaxis_title = variable_unit_map.get(var_name, 'Value')

            if yaxis_title not in yaxes_used:
                yaxes_used[yaxis_title] = len(yaxes_used) + 1

            yaxis_id = f'yaxis{"" if yaxes_used[yaxis_title] == 1 else yaxes_used[yaxis_title]}'
            secondary_y = yaxes_used[yaxis_title] > 1

            if var_name in new_variables:
                fig.add_trace(
                    go.Bar(
                        x=df_group[x_axis],
                        y=df_group['VarValue'],
                        name=display_name,  # Use the display name for the trace
                        hovertext=df_group['SAMPLE I.D'] if 'SAMPLE I.D' in df_group.columns else None,
                        opacity=0.5
                    ),
                    secondary_y=secondary_y
                )
            else:
                fig.add_trace(
                    go.Scatter(
                        x=df_group[x_axis],
                        y=df_group['VarValue'],
                        mode='lines',
                        name=display_name,  # Use the display name for the trace
                    ),
                    secondary_y=secondary_y
                )

            fig.update_yaxes(title_text=yaxis_title, secondary_y=secondary_y)

        fig.update_xaxes(title_text=x_label)
        fig.update_layout(
            title='Selected Variables Over Time',
            xaxis=dict(rangeslider=dict(visible=True), type="date" if time_mode != 'elapsed' else "linear"),
            bargap=0.9
        )

        return fig
    else:
        return px.line(title='No data to display.')

if __name__ == '__main__':
    filename_prefixes = extract_prefixes_from_saved_files(output_directory)
    app.run_server(debug=True, port=8066)