In [1]:
import pandas as pd
import numpy as np
import os, tarfile, shutil
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from dateutil.tz import gettz

from prophet import Prophet
from ortools.sat.python import cp_model



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TZ = "America/New_York"

# Week you’re scheduling
week_start = pd.Timestamp("2025-11-03 00:00", tz=TZ)  # Monday start
week_end   = week_start + pd.Timedelta(days=7)

# Fixed events (cannot move)
fixed_events = pd.DataFrame([
    # id, start, end, label
    ("class-os", "2025-11-03 12:50", "2025-11-03 14:45", "OS Class"),
    ("team-sync", "2025-11-04 18:30", "2025-11-04 19:30", "Team Sync"),
    ("iprd", "2025-11-06 13:00", "2025-11-06 14:30", "Profs Mtg"),
], columns=["id","start","end","label"])
fixed_events["start"] = pd.to_datetime(fixed_events["start"]).dt.tz_localize(TZ)
fixed_events["end"]   = pd.to_datetime(fixed_events["end"]).dt.tz_localize(TZ)

# Dynamic tasks to schedule (movable)
tasks = pd.DataFrame([
    # id, label, duration_hours, priority (1-3), latest (optional deadline)
    ("dw1", "Deep Work: Project", 3, 3, "2025-11-07 17:00"),
    ("study1", "Study: OS", 2, 2, "2025-11-08 23:59"),
    ("gym", "Gym", 1, 1, None),
], columns=["id","label","dur_h","priority","latest"])
tasks["latest"] = tasks["latest"].apply(lambda x: pd.to_datetime(x).tz_localize(TZ) if pd.notnull(x) else None)

# User preferences (simple, can become regressors)
PREF = {
    "slot_minutes": 60,          # choose 30 or 60
    "avoid_evenings_after": 20,  # 24h clock
    "prefer_morning_start": 8,
    "prefer_morning_end": 11,
    "weekend_ok": False,
}


In [3]:
slot_minutes = PREF["slot_minutes"]
slots = pd.DataFrame({
    "ds": pd.date_range(week_start, week_end, freq=f"{slot_minutes}min", inclusive="left")
})
# Working hours (adjust to your lifestyle)
slots["hour"] = slots["ds"].dt.hour
slots["weekday"] = slots["ds"].dt.weekday  # 0=Mon
slots["is_weekend"] = (slots["weekday"]>=5).astype(int)

# Block fixed events
blocked = np.zeros(len(slots), dtype=bool)
for _,r in fixed_events.iterrows():
    mask = (slots["ds"] >= r["start"]) & (slots["ds"] < r["end"])
    blocked |= mask.values

# Soft preference features (become regressors)
slots["prefer_morning"] = ((slots["hour"]>=PREF["prefer_morning_start"]) & (slots["hour"]<=PREF["prefer_morning_end"])).astype(int)
slots["avoid_late"] = (slots["hour"]>=PREF["avoid_evenings_after"]).astype(int)
if not PREF["weekend_ok"]:
    blocked |= (slots["is_weekend"]==1).values


In [4]:
# Minimal cold-start: a pseudo-history with priors
# Morning = higher expected productivity, late evening lower.
hist_start = week_start - pd.Timedelta(days=28)
hist_end   = week_start
hist = pd.DataFrame({"ds": pd.date_range(hist_start, hist_end, freq=f"{slot_minutes}min", inclusive="left")})
hist["hour"] = hist["ds"].dt.hour
hist["weekday"] = hist["ds"].dt.weekday
hist["prefer_morning"] = ((hist["hour"]>=PREF["prefer_morning_start"]) & (hist["hour"]<=PREF["prefer_morning_end"])).astype(int)
hist["avoid_late"] = (hist["hour"]>=PREF["avoid_evenings_after"]).astype(int)
hist["is_weekend"] = (hist["weekday"]>=5).astype(int)

# Prior utility ~ 0.65 for morning, 0.45 otherwise, minus penalty late evening
base = 0.45 + 0.20*hist["prefer_morning"] - 0.15*hist["avoid_late"] - 0.15*hist["is_weekend"]
hist["y"] = np.clip(base, 0.05, 0.95)  # in (0,1)
# Prophet expects one value per timestamp; use the prior as a target (fast to fit),
# then it will learn weekly/daily patterns and smooth them.


In [5]:
def add_regressors(df):
    out = df.copy()
    out["prefer_morning"] = ((out["ds"].dt.hour >= PREF["prefer_morning_start"]) & 
                             (out["ds"].dt.hour <= PREF["prefer_morning_end"])).astype(int)
    out["avoid_late"] = (out["ds"].dt.hour >= PREF["avoid_evenings_after"]).astype(int)
    out["is_weekend"] = (out["ds"].dt.weekday >= 5).astype(int)
    return out

hist_reg = add_regressors(hist[["ds", "y"]])

# remove timezone
hist_reg["ds"] = pd.to_datetime(hist_reg["ds"]).dt.tz_localize(None)

m = Prophet(
    daily_seasonality=True,
    weekly_seasonality=True,
    yearly_seasonality=False,
    seasonality_mode="additive"
)
for reg in ["prefer_morning", "avoid_late", "is_weekend"]:
    m.add_regressor(reg)

m.fit(hist_reg.rename(columns={"y": "y"}))

future = add_regressors(slots[["ds"]])
future["ds"] = pd.to_datetime(future["ds"]).dt.tz_localize(None)

forecast = m.predict(future)
slots["utility_base"] = forecast["yhat"].clip(0, 1).values



15:33:48 - cmdstanpy - INFO - Chain [1] start processing
15:33:48 - cmdstanpy - INFO - Chain [1] done processing


In [None]:
# must be defined before the CP-SAT code
def task_slot_utility(task_row, slot_row):
    # base score from Prophet
    u = float(slot_row.utility_base)

    # optional tweaks by task label/category
    label = str(task_row.label).lower()
    hour = int(slot_row.hour)

    # example: deep work prefers 8–17
    if "deep work" in label and not (8 <= hour <= 17):
        u *= 0.8
    # example: gym prefers morning
    if "gym" in label and (6 <= hour <= 9):
        u *= 1.2

    # keep in [0,1]
    return max(0.0, min(1.0, u))


# Helper to check if a start index is feasible for a task
def feasible_starts(task, blocked_mask):
    dur_slots = int(task.dur_h * 60 // slot_minutes)
    cand = []
    for i in range(len(slots) - dur_slots + 1):
        # any blocked inside?
        if blocked_mask[i:i+dur_slots].any():
            continue
        # deadline check (end <= latest) if provided
        if pd.notnull(task.latest):
            end_time = slots.loc[i + dur_slots - 1, "ds"] + pd.Timedelta(minutes=slot_minutes)
            if end_time > task.latest:
                continue
        cand.append(i)
    return cand

model = cp_model.CpModel()
x = {}  # x[(t_idx, start_idx)] = 1 if task t starts at start_idx

dur_slots_list = []
feasible_by_task = []
for t_idx, task in tasks.iterrows():
    dur_slots = int(task.dur_h * 60 // slot_minutes)
    dur_slots_list.append(dur_slots)
    cand = feasible_starts(task, blocked)
    feasible_by_task.append(cand)
    for i in cand:
        x[(t_idx, i)] = model.NewBoolVar(f"x_t{t_idx}_i{i}")

# Each task starts exactly once (you can allow skipping by adding a dummy option with penalty)
for t_idx, task in tasks.iterrows():
    cand = feasible_by_task[t_idx]
    if cand:
        model.Add(sum(x[(t_idx, i)] for i in cand) == 1)
    else:
        # No feasible location — in a real app, add a "skip" var with a penalty
        pass

# No overlaps
for s in range(len(slots)):
    covers = []
    for t_idx, task in tasks.iterrows():
        dur = dur_slots_list[t_idx]
        for i in feasible_by_task[t_idx]:
            if i <= s < i + dur:
                covers.append(x[(t_idx, i)])
    if covers:
        model.Add(sum(covers) <= 1)

# Objective: maximize total (utility × priority)
objective_terms = []
for t_idx, task in tasks.iterrows():
    dur = dur_slots_list[t_idx]
    for i in feasible_by_task[t_idx]:
        u = 0.0
        for k in range(dur):
            u += task_slot_utility(task, slots.iloc[i+k])
        objective_terms.append(u * float(task.priority) * x[(t_idx, i)])
model.Maximize(sum(objective_terms))

solver = cp_model.CpSolver()
solver.parameters.max_time_in_seconds = 10
_ = solver.Solve(model)

scheduled = []
for t_idx, task in tasks.iterrows():
    dur = dur_slots_list[t_idx]
    for i in feasible_by_task[t_idx]:
        if (t_idx, i) in x and solver.Value(x[(t_idx, i)]) == 1:
            start = slots.loc[i, "ds"]
            end = slots.loc[i + dur - 1, "ds"] + pd.Timedelta(minutes=slot_minutes)
            scheduled.append((task.id, task.label, start, end, task.priority))
scheduled_df = pd.DataFrame(scheduled, columns=["id","label","start","end","priority"]).sort_values("start")
scheduled_df


In [2]:
#load data
df = pd.read_csv("whats-happening-la-calendar-dataset.csv")

In [3]:
#preview data
df.head()

Unnamed: 0,Event Name,Event Description,Fee Required,Type of Event,Subject Matter,Age Groupings,Event by Service Area,Event Sponsor,Event Date & Time Start,Event Date & Time Ends,...,Elected Official Office Name,Elected Officials Name,Neighborhood Council,City Reference ID,Zip Codes,Census Tracts,Precinct Boundaries,LA Specific Plans,Council Districts,Neighborhood Councils (Certified)
0,West Hills Neighborhood Council Joint Special ...,,No,Neighborhood Council,Government,,,ITA,2025-05-18T19:15:00.000,,...,,,,95811.0,,,,,,
1,"Mar Vista Community Council (MVCC) Planning, L...",,No,Neighborhood Council,Government,,,ITA,2025-05-18T19:00:00.000,,...,,,,95808.0,,,,,,
2,Office of Community Beautification ~ West Adam...,Community Clean-Up,NO,Volunteerism,,,,BPW OCB,2017-05-06T08:00:00.000,2017-05-06T13:00:00.000,...,,,,,22724.0,681.0,1282.0,,11.0,8.0
3,Office of Community Beautification ~ West Adam...,Community Clean-Up,NO,Volunteerism,,,,BPW OCB,2017-04-08T08:00:00.000,2017-04-08T13:00:00.000,...,,,,,22724.0,681.0,1282.0,,11.0,8.0
4,Office of Community Beautification ~ West Adam...,Community Clean-Up,NO,Volunteerism,,,,BPW OCB,2017-03-04T08:00:00.000,2017-03-04T13:00:00.000,...,,,,,22724.0,681.0,1282.0,,11.0,8.0


In [4]:
#data summary
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29519 entries, 0 to 29518
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Event Name                         29469 non-null  object 
 1   Event Description                  15383 non-null  object 
 2   Fee Required                       28709 non-null  object 
 3   Type of Event                      28449 non-null  object 
 4   Subject Matter                     14691 non-null  object 
 5   Age Groupings                      8917 non-null   object 
 6   Event by Service Area              121 non-null    object 
 7   Event Sponsor                      13625 non-null  object 
 8   Event Date & Time Start            29478 non-null  object 
 9   Event Date & Time Ends             2253 non-null   object 
 10  Location Common Name               16014 non-null  object 
 11  Location Address                   16131 non-null  obj

In [5]:
#shape of dataset
print("Shape of dataset:", df.shape)


Shape of dataset: (29519, 26)


In [6]:
# data types
print("Data types of each column:")
print(df.dtypes)


Data types of each column:
Event Name                            object
Event Description                     object
Fee Required                          object
Type of Event                         object
Subject Matter                        object
Age Groupings                         object
Event by Service Area                 object
Event Sponsor                         object
Event Date & Time Start               object
Event Date & Time Ends                object
Location Common Name                  object
Location Address                      object
Contact Name                          object
Contact Number                        object
Contact Email                         object
Information Website                   object
Elected Official Office Name          object
Elected Officials Name                object
Neighborhood Council                  object
City Reference ID                     object
Zip Codes                            float64
Census Tracts               

In [7]:
#basic descriptive statistics
df.describe()


Unnamed: 0,Zip Codes,Census Tracts,Precinct Boundaries,LA Specific Plans,Council Districts,Neighborhood Councils (Certified)
count,15965.0,15930.0,15924.0,4651.0,15903.0,15620.0
mean,21024.065393,567.956999,808.59583,11.312621,8.514746,46.804481
std,5359.045234,350.402978,353.702335,10.286973,3.859111,29.478497
min,375.0,1.0,81.0,1.0,1.0,1.0
25%,19729.0,371.0,516.0,7.0,6.0,21.0
50%,23079.0,546.0,793.0,9.0,9.0,38.0
75%,23668.0,730.0,1105.0,10.0,11.0,76.0
max,30059.0,2342.0,1560.0,54.0,15.0,95.0


In [8]:
# check for missing values
df.isnull().values.any()


True

In [9]:
# total missing values per column
df.isnull().sum()


Event Name                              50
Event Description                    14136
Fee Required                           810
Type of Event                         1070
Subject Matter                       14828
Age Groupings                        20602
Event by Service Area                29398
Event Sponsor                        15894
Event Date & Time Start                 41
Event Date & Time Ends               27266
Location Common Name                 13505
Location Address                     13388
Contact Name                         28379
Contact Number                       27702
Contact Email                        28672
Information Website                   9966
Elected Official Office Name         28432
Elected Officials Name               28471
Neighborhood Council                 29470
City Reference ID                    16906
Zip Codes                            13554
Census Tracts                        13589
Precinct Boundaries                  13595
LA Specific

In [10]:
#fix event date & time start and event date & time end columns to datetime datatype
df["Event Date & Time Start"] = pd.to_datetime(df["Event Date & Time Start"], errors="coerce")
df["Event Date & Time Ends"] = pd.to_datetime(df["Event Date & Time Ends"], errors="coerce")

In [11]:
# convert columns from object to categorical
df["Type of Event"] = df["Type of Event"].astype("category")


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29519 entries, 0 to 29518
Data columns (total 26 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   Event Name                         29469 non-null  object        
 1   Event Description                  15383 non-null  object        
 2   Fee Required                       28709 non-null  object        
 3   Type of Event                      28449 non-null  category      
 4   Subject Matter                     14691 non-null  object        
 5   Age Groupings                      8917 non-null   object        
 6   Event by Service Area              121 non-null    object        
 7   Event Sponsor                      13625 non-null  object        
 8   Event Date & Time Start            29476 non-null  datetime64[ns]
 9   Event Date & Time Ends             2249 non-null   datetime64[ns]
 10  Location Common Name              

In [13]:
#drop irrelevent columns
df.drop(columns="Fee Required", inplace=True)
df.drop(columns="Elected Official Office Name", inplace=True)
df.drop(columns="Elected Officials Name", inplace=True)
df.drop(columns="Neighborhood Council", inplace=True)
df.drop(columns="City Reference ID", inplace=True)
df.drop(columns="Zip Codes", inplace=True)
df.drop(columns="Census Tracts", inplace=True)
df.drop(columns="Precinct Boundaries", inplace=True)
df.drop(columns="LA Specific Plans", inplace=True)
df.drop(columns="Council Districts", inplace=True)
df.drop(columns="Neighborhood Councils (Certified)", inplace=True)




In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29519 entries, 0 to 29518
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Event Name               29469 non-null  object        
 1   Event Description        15383 non-null  object        
 2   Type of Event            28449 non-null  category      
 3   Subject Matter           14691 non-null  object        
 4   Age Groupings            8917 non-null   object        
 5   Event by Service Area    121 non-null    object        
 6   Event Sponsor            13625 non-null  object        
 7   Event Date & Time Start  29476 non-null  datetime64[ns]
 8   Event Date & Time Ends   2249 non-null   datetime64[ns]
 9   Location Common Name     16014 non-null  object        
 10  Location Address         16131 non-null  object        
 11  Contact Name             1140 non-null   object        
 12  Contact Number           1817 no