In [None]:
import datasets
from src import data_loading

dataset = datasets.load_dataset("lmarena-ai/arena-human-preference-140k")
training_data = data_loading.load_training_data(dataset["train"].to_pandas())
print(f"Successfully loaded {len(training_data.entries)} entries")

In [None]:
from src.models.dense_network_model import DenseNetworkModel

model = DenseNetworkModel(
    hidden_dims=[512, 512, 256, 256, 128, 128, 64, 64],
    model_id_embedding_dim=64,
    learning_rate=5e-6
)

model.train(training_data, epochs=2_500, batch_size=512)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

history = model.get_history()
t = np.arange(len(history.total_loss))
log_loss = np.log(history.total_loss)
sliding_mean = np.convolve(log_loss, np.ones(10)/10, mode='valid')
plt.plot(t, log_loss, label="Training loss")
plt.plot(t[9:], sliding_mean, label="Sliding mean (10)")

min_sliding_mean = np.min(sliding_mean)
plt.axhline(y=min_sliding_mean, color='r', linestyle='--', label=f'Min sliding mean: {min_sliding_mean:.3f}')

plt.legend()
plt.show()

In [None]:
from src.data_models.data_models import InputData

example_prompts = [e.user_prompt for e in training_data.entries]
results = model.predict(InputData(
        prompts=example_prompts,
        model_names=model._model_encoder.names,
    ),
    batch_size=512,
).scores

sorted_results = sorted(
    [(name, float(np.mean(scores)), float(np.std(scores))) for name, scores in results.items()],
    key=lambda x: x[1],
    reverse=True
)

for name, mean_score, std_score in sorted_results:
    print(f"{(name+":"):<{max(len(name) for name, _, _ in sorted_results)+1}s} {mean_score:.6f} Â± {std_score:.6f}")