In [3]:
import pandas as pd
from datetime import datetime

In [4]:
df = pd.read_csv('processed_data.csv')
df['datetime_parsed'] = pd.to_datetime(df['created_at'], utc=True, errors='coerce')
df = df.dropna(subset=['datetime_parsed'])

print(f"Loaded {len(df)} records from processed_data.csv")
print(f"Data range: {df['datetime_parsed'].min()} to {df['datetime_parsed'].max()}")
print("=" * 60)

Loaded 8000 records from processed_data.csv
Data range: 2025-03-19 15:01:59+00:00 to 2025-03-22 10:51:07+00:00


In [5]:
def get_time_window(start_date, start_hour, start_minute, start_second,
                   end_date, end_hour, end_minute, end_second):
    """
    Filter time-series data by time window.

    Parameters:
    - start_date: 'YYYY-MM-DD' format
    - start_hour, start_minute, start_second: integers
    - end_date: 'YYYY-MM-DD' format
    - end_hour, end_minute, end_second: integers

    Returns: Filtered DataFrame
    """
    start_dt_str = f"{start_date} {start_hour:02d}:{start_minute:02d}:{start_second:02d}"
    end_dt_str = f"{end_date} {end_hour:02d}:{end_minute:02d}:{end_second:02d}"

    start_dt = pd.to_datetime(start_dt_str, utc=True)
    end_dt = pd.to_datetime(end_dt_str, utc=True)

    mask = (df['datetime_parsed'] >= start_dt) & (df['datetime_parsed'] <= end_dt)
    filtered_df = df[mask].copy()

    display_df = filtered_df.copy()
    display_df['created_at'] = display_df['datetime_parsed'].dt.strftime('%d/%m/%y')
    display_df = display_df.drop(columns=['datetime_parsed'])

    display_df.reset_index(drop=True, inplace=True)
    display_df.index = display_df.index + 25

    print(f"Time Window: {start_dt} to {end_dt}")
    print(f"Records Found: {len(filtered_df)} of {len(df)} total")
    print(f"Coverage: {(len(filtered_df)/len(df)*100):.1f}% of dataset")

    if 'was_interpolated' in display_df.columns:
        interpolated = display_df['was_interpolated'].sum()
        if interpolated > 0:
            print(f"  {interpolated} records were interpolated")

    print("-" * 60)

    if len(filtered_df) > 0:
        return display_df
    else:
        print(" No data found in this time window")
        return pd.DataFrame()


In [6]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
from datetime import datetime

df = pd.read_csv('processed_data.csv')
df['datetime_parsed'] = pd.to_datetime(df['created_at'], utc=True, errors='coerce')
df = df.dropna(subset=['datetime_parsed'])

print(f"Loaded {len(df)} records from processed_data.csv")
print(f"Data range: {df['datetime_parsed'].min()} to {df['datetime_parsed'].max()}")
print("=" * 60)

def get_time_window(start_date, start_hour, start_minute, start_second,
                   end_date, end_hour, end_minute, end_second):
    """
    Filter time-series data by time window.

    Parameters:
    - start_date: 'YYYY-MM-DD' format
    - start_hour, start_minute, start_second: integers
    - end_date: 'YYYY-MM-DD' format
    - end_hour, end_minute, end_second: integers

    Returns: Filtered DataFrame
    """

    start_dt_str = f"{start_date} {start_hour:02d}:{start_minute:02d}:{start_second:02d}"
    end_dt_str = f"{end_date} {end_hour:02d}:{end_minute:02d}:{end_second:02d}"

    start_dt = pd.to_datetime(start_dt_str, utc=True)
    end_dt = pd.to_datetime(end_dt_str, utc=True)

    mask = (df['datetime_parsed'] >= start_dt) & (df['datetime_parsed'] <= end_dt)
    filtered_df = df[mask].copy()

    display_df = filtered_df.copy()
    display_df['created_at'] = display_df['datetime_parsed'].dt.strftime('%d/%m/%y')
    display_df = display_df.drop(columns=['datetime_parsed'])

    display_df.reset_index(drop=True, inplace=True)
    display_df.index = display_df.index + 25

    return display_df, len(filtered_df)


Loaded 8000 records from processed_data.csv
Data range: 2025-03-19 15:01:59+00:00 to 2025-03-22 10:51:07+00:00


In [7]:
def create_time_window_interface():

    min_time = df['datetime_parsed'].min()
    max_time = df['datetime_parsed'].max()

    start_date = widgets.DatePicker(
        description='Start Date:',
        value=min_time.date(),
        style={'description_width': '100px'}
    )

    start_hour = widgets.Dropdown(
        description='Hour:',
        options=[f"{h:02d}" for h in range(24)],
        value='15',
        style={'description_width': '60px'}
    )

    start_minute = widgets.Dropdown(
        description='Minute:',
        options=[f"{m:02d}" for m in range(60)],
        value='01',
        style={'description_width': '60px'}
    )

    start_second = widgets.Dropdown(
        description='Second:',
        options=[f"{s:02d}" for s in range(60)],
        value='00',
        style={'description_width': '60px'}
    )

    end_date = widgets.DatePicker(
        description='End Date:',
        value=min_time.date(),
        style={'description_width': '100px'}
    )

    end_hour = widgets.Dropdown(
        description='Hour:',
        options=[f"{h:02d}" for h in range(24)],
        value='15',
        style={'description_width': '60px'}
    )

    end_minute = widgets.Dropdown(
        description='Minute:',
        options=[f"{m:02d}" for m in range(60)],
        value='04',
        style={'description_width': '60px'}
    )

    end_second = widgets.Dropdown(
        description='Second:',
        options=[f"{s:02d}" for s in range(60)],
        value='00',
        style={'description_width': '60px'}
    )

    filter_button = widgets.Button(
        description='Filter',
        button_style='info',
        layout=widgets.Layout(width='100px', height='35px')
    )

    output_area = widgets.Output()

    def on_filter_click(button):
        with output_area:
            output_area.clear_output()

            start_date_val = start_date.value.strftime('%Y-%m-%d')
            start_hour_val = int(start_hour.value)
            start_minute_val = int(start_minute.value)
            start_second_val = int(start_second.value)

            end_date_val = end_date.value.strftime('%Y-%m-%d')
            end_hour_val = int(end_hour.value)
            end_minute_val = int(end_minute.value)
            end_second_val = int(end_second.value)

            display_df, count = get_time_window(
                start_date_val, start_hour_val, start_minute_val, start_second_val,
                end_date_val, end_hour_val, end_minute_val, end_second_val
            )

            start_dt_str = f"{start_date_val} {start_hour_val:02d}:{start_minute_val:02d}:{start_second_val:02d}"
            end_dt_str = f"{end_date_val} {end_hour_val:02d}:{end_minute_val:02d}:{end_second_val:02d}"

            print(f" Time Window: {start_dt_str} to {end_dt_str}")
            print(f" Records Found: {count} of {len(df)} total")
            print(f" Coverage: {(count/len(df)*100):.1f}% of dataset")

            if 'was_interpolated' in display_df.columns:
                interpolated = display_df['was_interpolated'].sum()
                if interpolated > 0:
                    print(f" {interpolated} records were interpolated")

            print("-" * 60)

            if count > 0:
                display(display_df)
            else:
                print(" No data found in this time window")

    filter_button.on_click(on_filter_click)

    interface = widgets.VBox([
        widgets.HTML("<h3>Time Window Filter Interface</h3>"),
        widgets.HTML("<strong>Start Time:</strong>"),
        widgets.HBox([start_date, start_hour, start_minute, start_second]),
        widgets.HTML("<strong>End Time:</strong>"),
        widgets.HBox([end_date, end_hour, end_minute, end_second]),
        widgets.HBox([filter_button]),
        output_area
    ])

    return interface
print("Interactive Time Window Filter:")
display(create_time_window_interface())

Interactive Time Window Filter:


VBox(children=(HTML(value='<h3>Time Window Filter Interface</h3>'), HTML(value='<strong>Start Time:</strong>')…