### Import Packages

In [None]:
from llm import OpenAIChat, DeepInfraChat, QwenChat
from loader import Crux, LiveCodeBench
from runner.runner import Runner

### Load Datasets

In [None]:
crux_all_output = Crux.load_perturb("ALL", "output")
crux_mhc_input = Crux.load_perturb("MHC", "input")
lcb_van_output = LiveCodeBench.load_perturb("VAN")

### Load Models (API)

In [None]:
temperature = 0.2
max_tokens = 2000
timeout = 300

gpt_4o = OpenAIChat("gpt-4o-mini", folder_name="GPT-4o", temperature=temperature, max_tokens=max_tokens, timeout=timeout)
llama31_70b = DeepInfraChat("meta-llama/Meta-Llama-3.1-70B-Instruct", folder_name="LLaMA-3.1-70B-Instruct", temperature=temperature, max_tokens=max_tokens, timeout=timeout)
deepseek_v3 = DeepInfraChat("deepseek-ai/DeepSeek-V3", folder_name="DeepSeek-V3", temperature=temperature, max_tokens=max_tokens, timeout=timeout)
deepseek_r1 = DeepInfraChat("deepseek-ai/DeepSeek-R1", folder_name="DeepSeek-R1", temperature=temperature, timeout=timeout, stream=True)
qwq = QwenChat("qwq-32b", folder_name = "QwQ-32B", temperature=temperature, max_tokens=50000, timeout=timeout, stream=True, reasoning=True)


### Setup Runner Server

In [None]:
runner = Runner(crux_all_output, gpt_4o)
runner.run("crux_ALL_output_direct", max_workers=10, n=2, mode="output", cot=False)

runner = Runner(crux_mhc_input, gpt_4o)
runner.run("crux_MHC_input_cot", max_workers=10, n=1, mode="input", cot=True)

runner = Runner(lcb_van_output, gpt_4o)
runner.run("lcb_VAN_output_direct", max_workers=10, n=2, mode="output", cot=False)