In [3]:
import csv
import random
import os
import matplotlib.pyplot as plt
from vowpalwabbit import pyvw
from sklearn.model_selection import train_test_split

CA_MIN, CA_MAX = 9.0, 20.0  # action range
PDF_UNIFORM = 1.0 / (CA_MAX - CA_MIN)


def convert_to_vw_format(row):
    label = f"ca {row['action_time']}:{row['cost']}:{PDF_UNIFORM}"
    features = f"event_type={row['event_type']} hours_until_due:{row['hours_until_due']} daily_free_time:{row['daily_free_time']}"
    return f"{label} | {features}"



In [4]:
def load_and_prepare(csv_path):
    with open(csv_path, newline='') as f:
        reader = list(csv.DictReader(f))
        train_rows, test_rows = train_test_split(reader, test_size=0.2, random_state=42)
        return train_rows, test_rows

In [5]:
def write_vw_file(rows, path):
    with open(path, 'w') as f:
        for row in rows:
            f.write(convert_to_vw_format(row) + '\n')

In [6]:
def train_model(train_file, model_path):
    model = pyvw.vw(f"--ca {CA_MIN} {CA_MAX} -l 0.5 -f {model_path} --passes 10 --cache_file cache --kill_cache --quiet")
    with open(train_file, 'r') as f:
        for line in f:
            model.learn(line.strip())
    model.finish()


In [7]:
def evaluate(model_path, test_rows):
    model = pyvw.vw(f"-i {model_path} -t --ca {CA_MIN} {CA_MAX} --quiet")
    actual, predicted = [], []
    for row in test_rows:
        features = f"| event_type={row['event_type']} hours_until_due:{row['hours_until_due']} daily_free_time:{row['daily_free_time']}"
        pred = model.predict(features)
        actual.append(float(row['action_time']))
        predicted.append(pred)
    return actual, predicted

In [8]:
def plot_results(actual, predicted):
    errors = [abs(a - p) for a, p in zip(actual, predicted)]
    plt.plot(errors, label="Absolute Error")
    plt.xlabel("Test Example")
    plt.ylabel("Prediction Error (hours)")
    plt.title("Prediction Error on Test Set")
    plt.legend()
    plt.show()

In [9]:
csv_path = "schedule.csv"
train_file, test_file = "train.vw", "test.vw"
model_path = "model.vw"

train_rows, test_rows = load_and_prepare(csv_path)
write_vw_file(train_rows, train_file)
write_vw_file(test_rows, test_file)
train_model(train_file, model_path)

actual, predicted = evaluate(model_path, test_rows)
plot_results(actual, predicted)

  model = pyvw.vw(f"--ca {CA_MIN} {CA_MAX} -l 0.5 -f {model_path} --passes 10 --cache_file cache --kill_cache --quiet")


RuntimeError: unrecognised option '--ca'