In [9]:
import random
import pandas as pd
import matplotlib.pyplot as plt
from vowpalwabbit import pyvw
from sklearn.model_selection import train_test_split
import subprocess

In [10]:
def generate_data(n=1000, seed=42):
    random.seed(seed)
    data = []
    for _ in range(n):
        event_type = random.choice(["study", "relax", "sleep", "hobby", "gym"])
        hours_until_due = round(random.uniform(1, 72), 2)
        daily_free_time = round(random.uniform(1, 8), 1)
        min_time, max_time = 9.0, 20.0
        # Simulate best time to act: earlier if due soon, later if it's relax
        if event_type in ["study", "gym"]:
            ideal_time = max(min_time, min(max_time, 9 + (1 - min(hours_until_due/72, 1)) * 6))
        elif event_type == "relax":
            ideal_time = max(min_time, min(max_time, 18 - (daily_free_time/8)*3))
        else:
            ideal_time = random.uniform(min_time, max_time)
        
        cost = abs(random.gauss(ideal_time, 0.5) - ideal_time)
        action = round(ideal_time + random.uniform(-1, 1), 2)
        cost = abs(cost + random.gauss(0, 0.5))  # Add some noise

        data.append({
            "cost": cost,
            "min_time": min_time,
            "max_time": max_time,
            "action": action,
            "event_type": event_type,
            "hours_until_due": hours_until_due,
            "daily_free_time": daily_free_time
        })
    return pd.DataFrame(data)

In [11]:
def to_vw_line(row):
    return f"{row['cost']}:[{row['min_time']},{row['max_time']}]:{row['action']} " \
           f"|event_type {row['event_type']} hours_until_due:{row['hours_until_due']} daily_free_time:{row['daily_free_time']}"

In [12]:
def save_vw_file(df, path):
    with open(path, "w") as f:
        for _, row in df.iterrows():
            f.write(to_vw_line(row) + "\n")


In [13]:
def train_with_cli(train_file: str, model_out: str, min_time=9.0, max_time=20.0, passes=10):
    cmd = [
        "vw", "--cb_continuous", str(min_time), str(max_time),
        "-d", train_file,
        "-f", model_out,
        "--passes", str(passes),
        "--cache_file", "train.cache",
        "--quiet"
    ]
    subprocess.run(cmd, check=True)

In [14]:
def predict_with_cli(test_file: str, model_path: str, pred_output: str):
    cmd = [
        "vw", "-t", "-i", model_path,
        "-d", test_file,
        "-p", pred_output,
        "--quiet"
    ]
    subprocess.run(cmd, check=True)

In [15]:
def evaluate_predictions(test_file: str, pred_output: str):
    predicted = []
    with open(pred_output, "r") as f:
        for line in f:
            predicted.append(float(line.strip()))

    true = []
    with open(test_file, "r") as f:
        for line in f:
            true_action = float(line.split(":")[2].split()[0])
            true.append(true_action)

    errors = [abs(p - t) for p, t in zip(predicted, true)]
    return sum(errors) / len(errors)

In [16]:
def plot_validation_costs(costs):
    plt.plot(range(1, len(costs)+1), costs, marker='o')
    plt.title("Validation Cost over Training Passes")
    plt.xlabel("Training Pass")
    plt.ylabel("Average Validation Cost")
    plt.grid(True)
    plt.show()

In [17]:
# Save train/test files
save_vw_file(train_df, "train.vw")
save_vw_file(test_df, "test.vw")

# Train
train_with_cli("train.vw", "model.vw", passes=10)

# Predict
predict_with_cli("test.vw", "model.vw", "preds.txt")

# Evaluate
avg_error = evaluate_predictions("test.vw", "preds.txt")
print(f"Final Avg Error: {avg_error:.4f}")

[critical] vw (options_cli.cc:451): unrecognised option '--cb_continuous'


CalledProcessError: Command '['vw', '--cb_continuous', '9.0', '20.0', '-d', 'train.vw', '-f', 'model.vw', '--passes', '10', '--cache_file', 'train.cache', '--quiet']' returned non-zero exit status 1.