In [10]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, Markdown
import datetime

def choose_time_window(csv_file, date_col='created_at'):
    
    df = pd.read_csv(csv_file)
    df['__date_filter__'] = pd.to_datetime(df[date_col], utc=True, errors='coerce')
    df = df.dropna(subset=['__date_filter__'])  # Drop invalid date rows

    # Get the full range of data recording
    min_time = df['__date_filter__'].min()
    max_time = df['__date_filter__'].max()

    display(Markdown(f"**Data range:** {min_time.strftime('%Y-%m-%d %H:%M:%S %Z')} to {max_time.strftime('%Y-%m-%d %H:%M:%S %Z')}"))

    # Time dropdown options
    hour_options = [f"{h:02d}" for h in range(24)]
    minute_options = [f"{m:02d}" for m in range(60)]
    second_options = [f"{s:02d}" for s in range(60)]

    start_date = widgets.DatePicker(description='Start Date', value=min_time.date())
    end_date = widgets.DatePicker(description='End Date', value=max_time.date())
    
    start_hour = widgets.Dropdown(description='Hour', options=hour_options, value='00')
    start_minute = widgets.Dropdown(description='Minute', options=minute_options, value='00')
    start_second = widgets.Dropdown(description='Second', options=second_options, value='00')

    end_hour = widgets.Dropdown(description='Hour', options=hour_options, value='23')
    end_minute = widgets.Dropdown(description='Minute', options=minute_options, value='59')
    end_second = widgets.Dropdown(description='Second', options=second_options, value='59')

    filter_button = widgets.Button(description='Filter', button_style='primary')
    save_button = widgets.Button(description='Save to CSV', button_style='success')
    filename_input = widgets.Text(description='Filename:', value='filtered_data.csv')
    
    output = widgets.Output()
    confirmation = widgets.Output()

    # Store filtered data between callbacks
    filtered_df = None

    def on_filter(b):
        nonlocal filtered_df
        with output:
            output.clear_output()
            confirmation.clear_output()
            
            # Combine selected date and time
            start_dt_str = f"{start_date.value} {start_hour.value}:{start_minute.value}:{start_second.value}"
            end_dt_str = f"{end_date.value} {end_hour.value}:{end_minute.value}:{end_second.value}"

            # Convert to timezone-aware datetime objects
            start_ts = pd.to_datetime(start_dt_str).tz_localize('UTC')
            end_ts = pd.to_datetime(end_dt_str).tz_localize('UTC')

            # Filter the dataframe
            mask = (df['__date_filter__'] >= start_ts) & (df['__date_filter__'] <= end_ts)
            filtered_df = df.loc[mask].copy()

            # Format date column
            filtered_df[date_col] = filtered_df['__date_filter__'].dt.strftime('%d/%m/%y')
            filtered_df.drop(columns=['__date_filter__'], inplace=True)

            display(Markdown(f"**Filtered {len(filtered_df)} records:**"))
            display(filtered_df)

    def on_save(b):
        nonlocal filtered_df
        with confirmation:
            confirmation.clear_output()
            if filtered_df is None or len(filtered_df) == 0:
                display(Markdown(" **No data to save! Please filter first.**"))
                return
                
            filename = filename_input.value
            if not filename.lower().endswith('.csv'):
                filename += '.csv'
                
            try:
                filtered_df.to_csv(filename, index=False)
                display(Markdown(f" **Successfully saved {len(filtered_df)} records to {filename}**"))
            except Exception as e:
                display(Markdown(f" **Error saving file:** {str(e)}"))

    filter_button.on_click(on_filter)
    save_button.on_click(on_save)

    
    ui = widgets.VBox([
        widgets.HTML("<h3>Time Range Selection</h3>"),
        widgets.HBox([start_date, start_hour, start_minute, start_second]),
        widgets.HBox([end_date, end_hour, end_minute, end_second]),
        widgets.HBox([filter_button, save_button, filename_input]),
        output,
        confirmation
    ])
    
    display(ui)

In [9]:
choose_time_window('2881821.csv', date_col='created_at')


**Data range:** 2025-03-18 06:54:26 UTC to 2025-03-19 07:02:42 UTC

VBox(children=(HTML(value='<h3>Time Range Selection</h3>'), HBox(children=(DatePicker(value=datetime.date(2025…