In [23]:
events = [
    ("2025-04-26T23:50:00Z", "A", "start"),
    ("2025-04-29T00:30:00Z", "A", "end"),
    ("2025-04-29T09:00:00Z", "B", "start"),
    ("2025-04-29T10:15:00Z", "B", "end"),
    ("2025-04-29T18:00:00Z", "A", "start"),
    ("2025-04-29T19:00:00Z", "A", "end"),
]

import pandas as pd
from datetime import datetime, timedelta
df = pd.DataFrame(events, columns=["timestamp", "station_id", "state"])

In [24]:
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%dT%H:%M:%SZ", utc=True, errors='raise')
df['time'] = df['timestamp'].dt.time
df['day'] = df['timestamp'].dt.date

print(df)

                  timestamp station_id  state      time         day
0 2025-04-26 23:50:00+00:00          A  start  23:50:00  2025-04-26
1 2025-04-29 00:30:00+00:00          A    end  00:30:00  2025-04-29
2 2025-04-29 09:00:00+00:00          B  start  09:00:00  2025-04-29
3 2025-04-29 10:15:00+00:00          B    end  10:15:00  2025-04-29
4 2025-04-29 18:00:00+00:00          A  start  18:00:00  2025-04-29
5 2025-04-29 19:00:00+00:00          A    end  19:00:00  2025-04-29


In [25]:
unique_stations = df['station_id'].unique()

station_dfs = []
for station in unique_stations:
    station_df = df[df['station_id'] == station].copy()
    station_dfs.append(station_df)

print(station_dfs[0].head())


                  timestamp station_id  state      time         day
0 2025-04-26 23:50:00+00:00          A  start  23:50:00  2025-04-26
1 2025-04-29 00:30:00+00:00          A    end  00:30:00  2025-04-29
4 2025-04-29 18:00:00+00:00          A  start  18:00:00  2025-04-29
5 2025-04-29 19:00:00+00:00          A    end  19:00:00  2025-04-29


In [27]:
def minutes_since_midnight(tim):
    return 60*tim.hour+tim.minute


full_dic = {}
for i in range(len(station_dfs)):
    station_df = station_dfs[i]

    station_df.sort_values(by='timestamp', inplace=True)
    ret_dic = {}
    
    curr_day = None
    free_station = True
    curr_time = 0
    starting_hour = None

    for j in range(station_df.shape[0]):
        row = station_df.iloc[j,:]
        day = row['day']
        if not (day == curr_day):
            if row['state'] == 'start':
                if curr_day is not None:
                    ret_dic[curr_day] = curr_time
                curr_time = 0
                starting_hour = row['time']
            else:
                remaining_time_before = 1440 - (starting_hour.hour * 60 + starting_hour.minute)
                if curr_day is not None:
                    ret_dic[curr_day] = curr_time + remaining_time_before
                    # fill all the days between curr_day and day
                    curr_day += timedelta(days=1)
                    while curr_day<day:
                        ret_dic[curr_day] = 1440
                        curr_day += timedelta(days=1)
                curr_time = 60*row['time'].hour + row['time'].minute
            curr_day = day
        else:
            if row['state'] == 'start':
                starting_hour = row['time']
            else:
                curr_time += minutes_since_midnight(row['time'])-minutes_since_midnight(starting_hour)
    
    # dataset finished:
    ret_dic[day] = curr_time
    full_dic[unique_stations[i]] = ret_dic

print(full_dic)





{'A': {datetime.date(2025, 4, 26): 10, datetime.date(2025, 4, 27): 1440, datetime.date(2025, 4, 28): 1440, datetime.date(2025, 4, 29): 90}, 'B': {datetime.date(2025, 4, 29): 75}}
