In [22]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import pandas as pd
import numpy as np

data = pd.read_csv("./viewership_tv_aug.csv")

### Data Preprocessing

In [1]:

# Normalize to string and strip spaces
data["TRP_raw"] = data["TRP"].astype(str).str.strip()

# Treat empty strings as NaN
data.loc[data["TRP_raw"] == "", "TRP_raw"] = np.nan

# Remove comma
data["TRP_clean"] = data["TRP_raw"].str.replace(",", "", regex=False)

# Remove non-digit characters
data["TRP_clean"] = data["TRP_clean"].str.replace(r"[^0-9.]", "", regex=True)

# Convert TRP_clean column to numeric, coercing invalid values to NaN, and store it in TRP
data["TRP"] = pd.to_numeric(data["TRP_clean"], errors="coerce")

# Drop Nan in TRP columns
data = data.dropna(subset=["TRP"])

# Normalize TRP Values
data["TRP_log"] = np.log1p(data["TRP"])


data["TRP_log"].describe()

count    4464.000000
mean        9.963461
std         0.978572
min         6.282267
25%         9.430640
50%        10.136423
75%        10.677016
max        11.517893
Name: TRP_log, dtype: float64

In [2]:
import pandas as pd
import datetime as dt


# Convert Date column to datetime format, turning invalid values into NaT
data["Date"] = pd.to_datetime(data["Date"], errors="coerce")

# Convert Time column to datetime using HH:MM format, coerce invalid values, and extract only the time
data["Time_dt"] = pd.to_datetime(data["Time"], format="%H:%M", errors="coerce").dt.time

# Boolean: are we in the early morning (TV day still previous day)?
mask_early = data["Time_dt"] < dt.time(6, 0)

# Compute TVDate (shift back by 1 day for early-morning rows)
data["TVDate"] = data["Date"] - pd.to_timedelta(mask_early.astype(int), unit="D")

data["DayName"] = data["TVDate"].dt.day_name()
data["Weekend"] = data["DayName"].isin(["Saturday", "Sunday"]).astype(int)

# Make a 30-min slot label like "20:00-20:30"
def make_slot_label(t):
    # t can be string or datetime.time
    t = pd.to_datetime(str(t)).time()
    hour = t.hour
    minute = 0 if t.minute < 30 else 30
    start = f"{hour:02d}:{minute:02d}"
    # end time = start + 30 minutes
    end_dt = (pd.Timestamp.combine(pd.Timestamp.today(), pd.to_datetime(start).time())
              + pd.Timedelta(minutes=30))
    end = end_dt.strftime("%H:%M")
    return f"{start}-{end}"

data['Slot'] = data['Time'].apply(make_slot_label)
data.columns

Index(['Unnamed: 0', 'Date', 'Time', 'ProgramName', 'Genre', 'Episode', 'TRP',
       'TRP_raw', 'TRP_clean', 'TRP_log', 'Time_dt', 'TVDate', 'DayName',
       'Weekend', 'Slot'],
      dtype='object')

In [3]:
train_metadata = pd.read_csv("./Data/Train Programs.xlsx").rename(columns={"program_name": "ProgramName"})
test_metadata = pd.read_csv("./Data/Test Programs.xlsx").rename(columns={"program_name": "ProgramName"})
merged_train = pd.merge(train_metadata,data, on="ProgramName")

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

df = merged_train.copy()


In [5]:
import ast
import pandas as pd

def to_list(x):
    if isinstance(x, list):
        return x
    
    if pd.isna(x):
        return []
    
    # Case 1: Python list formatted as string: "['A','B']"
    if isinstance(x, str) and (x.startswith("[") and x.endswith("]")):
        try:
            return ast.literal_eval(x)
        except:
            pass
    
    # Case 2: Comma-separated string: "A, B, C"
    if isinstance(x, str) and "," in x:
        return [v.strip() for v in x.split(",") if v.strip()]
    
    # Case 3: Single label string: "Drama"
    if isinstance(x, str):
        return [x.strip()]
    
    return []

multi_label_cols = ["subgenres", "themes", "tone", "suitable_slots"]

for col in multi_label_cols:
    df[col] = df[col].apply(to_list)


for col in multi_label_cols:
    test_metadata[col] = test_metadata[col].apply(to_list)

### Model Preparation

In [6]:
feature_cols = [
    "base_genre",
    "subgenres",
    "themes",
    "tone",
    "pacing",
    "target_audience",
    "violence_level",
    "sexual_content_level",
    "language_intensity",
    "DayName",
    "Weekend",
    "Slot"
]

X = df[feature_cols]
y = df["TRP_log"]

multi_label_cols = ["subgenres", "themes", "tone"]

cat_single = [
    "base_genre",
    "pacing",
    "target_audience",
    "DayName",
    "Slot",
]

num_features = [
    "violence_level",
    "sexual_content_level",
    "language_intensity",
    "Weekend",
]

In [7]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, root_mean_squared_error, make_scorer
from catboost import CatBoostRegressor
import numpy as np
from sklearn.preprocessing import StandardScaler

from Helpers import MultiLabelBinarizerTransformer  

# assumes these are already defined:
# multi_label_cols, cat_single, num_features

preprocess = ColumnTransformer(
    transformers=[
        # *[
        #     (f"mlb_{col}", MultiLabelBinarizerTransformer(), [col])
        #     for col in multi_label_cols
        # ],
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_single),
        ("num_scaled", StandardScaler(), num_features)
    ]
)



base_pipeline = Pipeline(
    steps=[
        ("preprocess", preprocess),
        ("regressor", RandomForestRegressor(random_state=42, n_jobs=-1)),
    ]
)

param_grid = [

    # RANDOM FOREST
    {
        "regressor": [RandomForestRegressor(random_state=42, n_jobs=-1)],
        "regressor__n_estimators": [200, 400, 800],
        "regressor__max_depth": [None, 10, 20, 30],
        "regressor__min_samples_split": [2, 5],
        "regressor__min_samples_leaf": [1, 2, 4],
        "regressor__max_features": ["sqrt", "log2"],
    },

    # GRADIENT BOOSTING
    {
        "regressor": [GradientBoostingRegressor(random_state=42)],
        "regressor__n_estimators": [200, 400],
        "regressor__learning_rate": [0.03, 0.05, 0.1],
        "regressor__max_depth": [2, 3],
        "regressor__subsample": [0.7, 1.0],
        "regressor__min_samples_leaf": [1, 2, 4],
    },

    # RIDGE (baseline linear model)
    {
        "regressor": [Ridge()],
        "regressor__alpha": [0.01, 0.1, 1.0, 10.0, 100.0],
        # "regressor__fit_intercept": [True, False],
    },

    # PLAIN LINEAR REGRESSION (no regularization)
    {
        "regressor": [LinearRegression()],
        "regressor__fit_intercept": [True, False],
        # no other real hyperparams to tune here
    },

    # CATBOOST (after preprocessing – all numeric)
    {
        "regressor": [CatBoostRegressor(verbose=False, random_seed=42)],
        "regressor__depth": [4, 6, 8],
        "regressor__learning_rate": [0.02, 0.03, 0.05],
        "regressor__n_estimators": [300, 500, 800],
        "regressor__l2_leaf_reg": [1, 3, 5],
        "regressor__random_strength": [1, 2],
    },
]



scorers = {
    "r2": make_scorer(r2_score),
    "rmse": make_scorer(root_mean_squared_error, greater_is_better=False),
}

search = GridSearchCV(
    estimator=base_pipeline,
    param_grid=param_grid,
    cv=5,
    scoring=scorers,
    refit="r2",
    verbose=0,
    n_jobs=-1,
)

search.fit(X_train, y_train)

print("Best Params:", search.best_params_)
print("Best CV R²:", search.best_score_)

best_model = search.best_estimator_
y_pred = best_model.predict(X_val)

print("Val R²:", r2_score(y_val, y_pred))
print("Val RMSE:", root_mean_squared_error(y_val, y_pred))


Best Params: {'regressor': GradientBoostingRegressor(random_state=42), 'regressor__learning_rate': 0.05, 'regressor__max_depth': 3, 'regressor__min_samples_leaf': 4, 'regressor__n_estimators': 200, 'regressor__subsample': 0.7}
Best CV R²: 0.17693101357775215
Val R²: 0.1647361946774979
Val RMSE: 0.9022495063358341


In [9]:
results = search.cv_results_

pd.DataFrame({
    "rank": results["rank_test_r2"],
    "mean_r2": results["mean_test_r2"],
    "std_r2": results["std_test_r2"],
    "mean_rmse": -results["mean_test_rmse"],   # negative → flip back
    "std_rmse": results["std_test_rmse"],
    "params": results["params"]
}).sort_values(by="mean_rmse")

Unnamed: 0,rank,mean_r2,std_r2,mean_rmse,std_rmse,params
188,1,0.176931,0.032362,0.884096,0.013560,{'regressor': GradientBoostingRegressor(random...
166,2,0.176241,0.034792,0.884410,0.013103,{'regressor': GradientBoostingRegressor(random...
174,3,0.175666,0.033155,0.884749,0.012579,{'regressor': GradientBoostingRegressor(random...
178,4,0.174827,0.032794,0.885204,0.012386,{'regressor': GradientBoostingRegressor(random...
162,5,0.174408,0.035694,0.885384,0.013764,{'regressor': GradientBoostingRegressor(random...
...,...,...,...,...,...,...
19,381,-0.014647,0.071870,0.980869,0.019676,"{'regressor': RandomForestRegressor(n_jobs=-1,..."
2,382,-0.014961,0.073056,0.980994,0.020307,"{'regressor': RandomForestRegressor(n_jobs=-1,..."
1,383,-0.015160,0.073491,0.981083,0.020482,"{'regressor': RandomForestRegressor(n_jobs=-1,..."
18,384,-0.016318,0.071786,0.981672,0.019229,"{'regressor': RandomForestRegressor(n_jobs=-1,..."


### Prediction

In [23]:
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
weekend_days = ["Saturday", "Sunday"]

slots = [
    "06:00-06:30", "06:30-07:00",
    "07:00-07:30", "07:30-08:00",
    "08:00-08:30", "08:30-09:00",
    "09:00-09:30", "09:30-10:00",
    "10:00-10:30", "10:30-11:00",
    "11:00-11:30", "11:30-12:00",
    "12:00-12:30", "12:30-13:00",
    "13:00-13:30", "13:30-14:00",
    "14:00-14:30", "14:30-15:00",
    "15:00-15:30", "15:30-16:00",
    "16:00-16:30", "16:30-17:00",
    "17:00-17:30", "17:30-18:00",
    "18:00-18:30", "18:30-19:00",
    "19:00-19:30", "19:30-20:00",
    "20:00-20:30", "20:30-21:00",
    "21:00-21:30", "21:30-22:00",
    "22:00-22:30", "22:30-23:00",
    "23:00-23:30", "23:30-00:00",
    "00:00-00:30", "00:30-01:00",
    "01:00-01:30", "01:30-02:00",
    "02:00-02:30", "02:30-03:00",
    "03:00-03:30", "03:30-04:00",
    "04:00-04:30", "04:30-05:00",
    "05:00-05:30", "05:30-06:00",
]
slot_index = {s: i for i, s in enumerate(slots)}

In [24]:
new_program_names = [
    "Seinfeld", "Arrested Development", "Brooklyn Nine-Nine",
    "The Crown", "The Wire", "Mad Men",
    "The Mandalorian", "The Boys", "Prison Break", "Outlander",
]

new_programs_df = test_metadata[test_metadata["ProgramName"].isin(new_program_names)].drop_duplicates(
    subset=["ProgramName"]
)

prog_meta = {}
for _, row in new_programs_df.iterrows():
    prog_meta[row["ProgramName"]] = row

In [25]:
import numpy as np
import pandas as pd

def predict_program_trp_for_day(
    program_row,
    dayname,
    start_slot,
    num_slots,
    model,
    slots,
    slot_index,
    trp_log_min,
    trp_log_max,
):
    """
    Predict the total *raw* TRP for a given program on a given day
    if it occupies `num_slots` consecutive slots starting at `start_slot`.

    Assumes:
      - `model` was trained on TRP_log (log1p of TRP).
      - `slots` is an ordered list of all possible slot labels.
      - `slot_index` maps slot label -> index in `slots`.
      - `trp_log_min` and `trp_log_max` are taken from the training df["TRP_log"]
        to clamp predictions to a reasonable range.
    """

    # slots used for this program
    start_idx = slot_index[start_slot]
    used_slots = slots[start_idx : start_idx + num_slots]

    weekend_flag = 1 if dayname in ["Saturday", "Sunday"] else 0

    rows = []
    for s in used_slots:
        rows.append({
            "base_genre": program_row["base_genre"],
            "subgenres_str": program_row.get("subgenres_str", ""),
            "themes_str": program_row.get("themes_str", ""),
            "tone_str": program_row.get("tone_str", ""),
            "pacing": program_row["pacing"],
            "target_audience": program_row["target_audience"],
            "violence_level": program_row["violence_level"],
            "sexual_content_level": program_row["sexual_content_level"],
            "language_intensity": program_row["language_intensity"],
            "DayName": dayname,
            "Weekend": weekend_flag,
            "Slot": s,
        })

    X_candidate = pd.DataFrame(rows)

    # 1) predict in log-space
    trp_log_preds = model.predict(X_candidate)

    # 2) clamp logs to a reasonable range (avoid crazy exps)
    trp_log_preds = np.clip(trp_log_preds, trp_log_min, trp_log_max)

    # 3) convert back to raw TRP
    trp_preds = np.expm1(trp_log_preds)

    # total TRP over all consecutive slots
    return float(trp_preds.sum()), used_slots

In [26]:
from joblib import Parallel, delayed
from tqdm import tqdm
import os

def build_candidates_with_local_tqdm(
    row,
    model,
    slots,
    slot_index,
    trp_log_min,
    trp_log_max,
):
    prog_name = row["ProgramName"]

    tqdm_desc = f"CPU {os.getpid()} | {prog_name}"
    candidates = []

    # WEEKDAY (2 consecutive slots, Mon–Fri)
    for start_idx in tqdm(
        range(len(slots)),
        desc=tqdm_desc,
        leave=False,
    ):
        if start_idx + 2 > len(slots):
            break
        start_slot = slots[start_idx]

        total_trp = 0.0
        occupied = []

        for d in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]:
            trp_d, used_slots = predict_program_trp_for_day(
                program_row=row,
                dayname=d,
                start_slot=start_slot,
                num_slots=2,
                model=model,
                slots=slots,
                slot_index=slot_index,
                trp_log_min=trp_log_min,
                trp_log_max=trp_log_max,
            )
            total_trp += trp_d
            occupied.append({
                "day": d,
                "slot_indices": [slot_index[s] for s in used_slots],
                "genre": row["base_genre"],
            })

        candidates.append({
            "ProgramName": row["ProgramName"],
            "schedule_type": "weekday",
            "start_slot": start_slot,
            "start_index": start_idx,
            "num_slots": 2,
            "total_pred_trp": total_trp,
            "occupied": occupied,
            "genre": row["base_genre"],
        })

    # WEEKEND (4 consecutive slots, Sat–Sun)
    for start_idx in tqdm(
        range(len(slots)),
        desc=tqdm_desc,
        leave=False,
    ):
        if start_idx + 4 > len(slots):
            break
        start_slot = slots[start_idx]

        total_trp = 0.0
        occupied = []

        for d in ["Saturday", "Sunday"]:
            trp_d, used_slots = predict_program_trp_for_day(
                program_row=row,
                dayname=d,
                start_slot=start_slot,
                num_slots=4,
                model=model,
                slots=slots,
                slot_index=slot_index,
                trp_log_min=trp_log_min,
                trp_log_max=trp_log_max,
            )
            total_trp += trp_d
            occupied.append({
                "day": d,
                "slot_indices": [slot_index[s] for s in used_slots],
                "genre": row["base_genre"],
            })

        candidates.append({
            "ProgramName": row["ProgramName"],
            "schedule_type": "weekend",
            "start_slot": start_slot,
            "start_index": start_idx,
            "num_slots": 4,
            "total_pred_trp": total_trp,
            "occupied": occupied,
            "genre": row["base_genre"],
        })

    return candidates

In [27]:

# 2) TRP log bounds from training data
trp_log_min = df["TRP_log"].min()
trp_log_max = df["TRP_log"].max()

# 3) Parallel candidate generation
results = Parallel(n_jobs=10, prefer="threads")(
    delayed(build_candidates_with_local_tqdm)(
        row=row,
        model=best_model,
        slots=slots,
        slot_index=slot_index,
        trp_log_min=trp_log_min,
        trp_log_max=trp_log_max,
    )
    for _, row in test_metadata.iterrows()
)

candidate_schedules = [c for sub in results for c in sub]


CPU 1964744 | Arrested Development:   0%|          | 0/48 [00:00<?, ?it/s][A


CPU 1964744 | The Crown:   0%|          | 0/48 [00:00<?, ?it/s][A[A[A

CPU 1964744 | Seinfeld:   0%|          | 0/48 [00:00<?, ?it/s]<?, ?it/s][A[A



CPU 1964744 | The Mandalorian:   0%|          | 0/48 [00:00<?, ?it/s][A[A[A[A
CPU 1964744 | Arrested Development:  12%|█▎        | 6/48 [00:00<00:00, 50.96it/s][A


CPU 1964744 | The Crown:  10%|█         | 5/48 [00:00<00:01, 41.24it/s][A[A[A




CPU 1964744 | The Wire:   0%|          | 0/48 [00:00<?, ?it/s][A[A[A[A[A





CPU 1964744 | Outlander:   0%|          | 0/48 [00:00<?, ?it/s][A[A[A[A[A[A






CPU 1964744 | Mad Men:   0%|          | 0/48 [00:00<?, ?it/s][A[A[A[A[A[A[A







CPU 1964744 | Prison Break:   0%|          | 0/48 [00:00<?, ?it/s][A[A[A[A[A[A[A[A

CPU 1964744 | Seinfeld:  12%|█▎        | 6/48 [00:00<00:00, 52.37it/s] 8.64it/s][A[A



CPU 1964744 | The Mandalorian:   8%|▊         | 4/48 [00:00<00:01

In [28]:
df_pred = pd.DataFrame(candidate_schedules)

In [29]:
df_pred[df_pred["ProgramName"] == "Seinfeld" ]

Unnamed: 0,ProgramName,schedule_type,start_slot,start_index,num_slots,total_pred_trp,occupied,genre
0,Seinfeld,weekday,06:00-06:30,0,2,241140.861434,"[{'day': 'Monday', 'slot_indices': [0, 1], 'ge...",Comedy
1,Seinfeld,weekday,06:30-07:00,1,2,232689.719428,"[{'day': 'Monday', 'slot_indices': [1, 2], 'ge...",Comedy
2,Seinfeld,weekday,07:00-07:30,2,2,235922.045753,"[{'day': 'Monday', 'slot_indices': [2, 3], 'ge...",Comedy
3,Seinfeld,weekday,07:30-08:00,3,2,245099.353188,"[{'day': 'Monday', 'slot_indices': [3, 4], 'ge...",Comedy
4,Seinfeld,weekday,08:00-08:30,4,2,243720.944576,"[{'day': 'Monday', 'slot_indices': [4, 5], 'ge...",Comedy
...,...,...,...,...,...,...,...,...
87,Seinfeld,weekend,02:00-02:30,40,4,95473.641427,"[{'day': 'Saturday', 'slot_indices': [40, 41, ...",Comedy
88,Seinfeld,weekend,02:30-03:00,41,4,88250.484777,"[{'day': 'Saturday', 'slot_indices': [41, 42, ...",Comedy
89,Seinfeld,weekend,03:00-03:30,42,4,87904.213023,"[{'day': 'Saturday', 'slot_indices': [42, 43, ...",Comedy
90,Seinfeld,weekend,03:30-04:00,43,4,89401.795844,"[{'day': 'Saturday', 'slot_indices': [43, 44, ...",Comedy


### Optimization

In [17]:
!pip install pulp

/bin/bash: line 1: pip: command not found


In [18]:
import pulp
from collections import defaultdict
import pandas as pd

In [19]:
import pulp
from collections import defaultdict
import pandas as pd

# df_pred is your candidate_schedules DataFrame
# Ensure it has columns: ['ProgramName', 'schedule_type', 'total_pred_trp', 'occupied', 'genre', 'start_index']

# 0) Collect candidate IDs
candidate_ids = df_pred.index.tolist()

# 1) Define problem
prob = pulp.LpProblem("TV_Schedule_Optimization", pulp.LpMaximize)

# 2) Decision variables: x[i] = 1 if candidate i is chosen
x = pulp.LpVariable.dicts("x", candidate_ids, lowBound=0, upBound=1, cat="Binary")

# 3) Objective: maximize total predicted TRP
prob += pulp.lpSum(df_pred.loc[i, "total_pred_trp"] * x[i] for i in candidate_ids)
prob += pulp.lpSum(x[i] for i in df_pred.index if df_pred.loc[i, "schedule_type"] == "weekend") >= 2, "at_least_two_weekend"

# 4) Constraint: each program must be placed exactly once
for prog_name, group in df_pred.groupby("ProgramName"):
    prob += pulp.lpSum(x[i] for i in group.index) == 1, f"one_schedule_for_{prog_name}"

# 5) Constraint: no overlapping slots in the grid
# Build mapping (day, slot_index) -> list of candidate indices that occupy it
slot_to_candidates = defaultdict(list)

for i, row in df_pred.iterrows():
    for occ in row["occupied"]:          # occ: {"day": ..., "slot_indices": [...], "genre": ...}
        day = occ["day"]
        for si in occ["slot_indices"]:
            slot_to_candidates[(day, si)].append(i)

for (day, si), cand_list in slot_to_candidates.items():
    prob += pulp.lpSum(x[i] for i in cand_list) <= 1, f"no_overlap_{day}_{si}"

# 6) Constraint: consecutive hours should have different genres (for different programs)
# We treat each pair of hours (h, h+1) on same day and forbid two different programs
# of the same genre occupying those hours.

def hour_block(slot_index: int) -> int:
    # 2 slots per hour: 0-1 -> hour 0, 2-3 -> hour 1, etc.
    return slot_index // 2

# Build mapping: (day, hour_block) -> list of (candidate_index, ProgramName, genre)
day_hour_to_candidates = defaultdict(list)

for i, row in df_pred.iterrows():
    prog = row["ProgramName"]
    genre = row["genre"]
    for occ in row["occupied"]:
        day = occ["day"]
        for si in occ["slot_indices"]:
            hb = hour_block(si)
            day_hour_to_candidates[(day, hb)].append((i, prog, genre))

# Now create adjacency constraints:
# For each day, hour h and h+1, for each pair of candidates (i,j) with:
#   - same genre
#   - different ProgramName
#   - i occupies hour h, j occupies hour h+1
# enforce: x[i] + x[j] <= 1
added_pairs = set()

all_days = sorted({k[0] for k in day_hour_to_candidates.keys()})
max_hour = max(h for (_, h) in day_hour_to_candidates.keys()) if day_hour_to_candidates else 0

for day in all_days:
    for h in range(max_hour):  # pair (h, h+1)
        cand_h  = day_hour_to_candidates.get((day, h), [])
        cand_h1 = day_hour_to_candidates.get((day, h + 1), [])

        for (i, prog_i, genre_i) in cand_h:
            for (j, prog_j, genre_j) in cand_h1:
                if genre_i == genre_j and prog_i != prog_j:
                    # avoid duplicate constraints
                    key = tuple(sorted((i, j)))
                    if key in added_pairs:
                        continue
                    added_pairs.add(key)

                    prob += x[i] + x[j] <= 1, f"adjacent_genre_{day}_h{h}_cand{i}_{j}"

# 7) Solve the model
prob.solve(pulp.PULP_CBC_CMD(msg=False))  # or msg=False to silence solver output

print("Status:", pulp.LpStatus[prob.status])
# print("Optimal total predicted TRP:", pulp.value(prob.objective))

Status: Optimal


In [20]:
chosen_rows = []

for i in candidate_ids:
    if pulp.value(x[i]) > 0.5:  # selected
        chosen_rows.append(df_pred.loc[i])

chosen_df = pd.DataFrame(chosen_rows).reset_index(drop=True)
print(chosen_df[["ProgramName", "schedule_type", "start_slot", "total_pred_trp"]])

            ProgramName schedule_type   start_slot  total_pred_trp
0              Seinfeld       weekday  20:00-20:30   337646.480249
1  Arrested Development       weekday  14:00-14:30   254128.295108
2    Brooklyn Nine-Nine       weekday  16:00-16:30   326715.196251
3             The Crown       weekday  19:00-19:30   448681.930291
4              The Wire       weekday  21:00-21:30   340598.709527
5               Mad Men       weekend  19:00-19:30   154704.366751
6       The Mandalorian       weekday  17:00-17:30   313294.942367
7              The Boys       weekend  16:00-16:30   136615.640183
8          Prison Break       weekday  22:00-22:30   325847.449228
9             Outlander       weekday  10:00-10:30   303937.238358


In [21]:
# Correct ordered days
ordered_days = [
    "Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
    "Saturday", "Sunday"
]

# Build empty grid using ordered days
grid_data = {day: {s: "" for s in slots} for day in ordered_days}

# Fill the grid
for _, row in chosen_df.iterrows():
    label = f"{row['ProgramName']} ({row['genre']}, {row['schedule_type']})"
    for occ in row["occupied"]:
        day = occ["day"]
        for si in occ["slot_indices"]:
            slot_label = slots[si]
            grid_data[day][slot_label] = label

# Create DataFrame with ordered columns
grid_df = pd.DataFrame(grid_data)
grid_df = grid_df[ordered_days]  # ensure correct order
grid_df.index.name = "Slot"
grid_df



Unnamed: 0_level_0,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
Slot,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
06:00-06:30,,,,,,,
06:30-07:00,,,,,,,
07:00-07:30,,,,,,,
07:30-08:00,,,,,,,
08:00-08:30,,,,,,,
08:30-09:00,,,,,,,
09:00-09:30,,,,,,,
09:30-10:00,,,,,,,
10:00-10:30,"Outlander (Romance, weekday)","Outlander (Romance, weekday)","Outlander (Romance, weekday)","Outlander (Romance, weekday)","Outlander (Romance, weekday)",,
10:30-11:00,"Outlander (Romance, weekday)","Outlander (Romance, weekday)","Outlander (Romance, weekday)","Outlander (Romance, weekday)","Outlander (Romance, weekday)",,
