In [83]:
import json
import pandas as pd


In [84]:
# Change some pandas display options
pd.set_option("display.max_rows", 10000)
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_colwidth", 0)
pd.set_option("display.width", 1000)
pd.set_option("styler.format.precision", 10)

In [85]:
with open("../data/raw/spaces_dummy_data.json", mode="r") as f:
    file = json.load(f)


In [86]:
spaces_details = pd.json_normalize(file, record_path=["available_dates"], meta=["id"], errors="ignore")

# Change columns type
spaces_details["start"] = spaces_details["start"].astype("datetime64[ns]")
spaces_details["end"] = spaces_details["end"].astype("datetime64[ns]")
spaces_details["cancellable"] = spaces_details["cancellable"].astype(int)


# Split date from time
spaces_details["date"] = spaces_details["start"].dt.to_period("d")


# Get available time span for each date
spaces_details["time_span"] = (spaces_details["end"] - spaces_details["start"]).astype("timedelta64[h]")

# Correct time span to be 24 for whole day
spaces_details["time_span"] = spaces_details["time_span"].apply(lambda x: x + 1 if x == 23 else x)


spaces_details["cancellable_percent"] = spaces_details["cancellable"] * spaces_details["time_span"]
spaces_details["cancellable_percent"] /= spaces_details["time_span"].sum()

# Extract time from datetime columns
spaces_details["start"] = spaces_details["start"].dt.time
spaces_details["end"] = spaces_details["end"].dt.time

spaces_details.groupby(["id", "date"]).sum()
spaces_details

  spaces_details.groupby(["id", "date"]).sum()


Unnamed: 0,start,end,cancellable,id,date,time_span,cancellable_percent
0,08:00:00,11:00:00,1,A1,2022-01-01,3.0,0.016304
1,15:00:00,17:00:00,0,A1,2022-01-01,2.0,0.0
2,00:00:00,23:59:59,0,A1,2022-01-02,24.0,0.0
3,10:00:00,15:00:00,0,A1,2022-01-03,5.0,0.0
4,00:00:00,23:59:00,0,A1,2022-01-04,24.0,0.0
5,08:00:00,14:00:00,1,A1,2022-01-05,6.0,0.032609
6,00:00:00,23:59:00,0,A2,2022-01-01,24.0,0.0
7,09:00:00,14:00:00,1,A2,2022-01-02,5.0,0.027174
8,00:00:00,23:59:00,1,A2,2022-01-03,24.0,0.130435
9,11:00:00,17:00:00,0,A2,2022-01-04,6.0,0.0


In [91]:
# Add encoded column
spaces_details['enc'] = [[0] * 24 for i in spaces_details.index]

    

for ind, row in spaces_details.iterrows():
    if row["end"].hour ==23:
        end = 24
    else:
        end = row["end"].hour
    for i in range(row["start"].hour, end):
        row['enc'][i] = 1

spaces_details.groupby(["id"]).sum()

  spaces_details.groupby(["id"]).sum()


Unnamed: 0_level_0,cancellable,time_span,cancellable_percent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A1,2,64.0,0.048913
A2,2,83.0,0.157609
A3,3,37.0,0.059783


In [88]:
spaces_details_group = spaces_details.groupby(["id"]).sum()

spaces_details_group["enc"] = [[0] * 24 for i in spaces_details_group.index]


spaces_details_group


  spaces_details_group = spaces_details.groupby(["id"]).sum()


Unnamed: 0_level_0,cancellable,time_span,cancellable_percent,enc
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A1,2,64.0,0.048913,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
A2,2,83.0,0.157609,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
A3,3,37.0,0.059783,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
