# Multitask RLVR using GRPO
### *HSE, "Data Science Specialist" Final Project*
Sukhotin Oleg, DS-19, 2026

В данном ноутбуке производится эвалюация моделей на LLM-бенчмарках с помощью библиотеки https://github.com/EleutherAI/lm-evaluation-harness.git.

In [None]:
%%capture
import os
!pip install --upgrade -qqq uv
if "COLAB_" not in "".join(os.environ.keys()):
    # If you're not in Colab, just use pip install!
    !pip install unsloth vllm
else:
    try: import numpy, PIL; get_numpy = f"numpy=={numpy.__version__}"; get_pil = f"pillow=={PIL.__version__}"
    except: get_numpy = "numpy"; get_pil = "pillow"
    try: import subprocess; is_t4 = "Tesla T4" in str(subprocess.check_output(["nvidia-smi"]))
    except: is_t4 = False
    get_vllm, get_triton = ("vllm==0.9.2", "triton==3.2.0") if is_t4 else ("vllm==0.10.2", "triton")
    !uv pip install -qqq --upgrade \
        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers
    !uv pip install -qqq {get_triton}
!uv pip install transformers==4.56.2
!uv pip install --no-deps trl==0.22.2
!pip install -q datasets huggingface_hub tqdm pandas
!pip install -q math_verify latex2sympy2_extended

!rm -rf test-suite-sql-eval
!git clone -q https://github.com/taoyds/test-suite-sql-eval


In [None]:
!pip install -q git+https://github.com/EleutherAI/lm-evaluation-harness.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.8/100.8 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.1/91.1 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for lm_eval (pyproject.toml) ... [?25l[?25hdone
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Building wheel for sqlitedict (setup.py) ... [?25l[?25hdone
  Building wheel for word2number (setup.py) ... [?25l[?25hdone


In [None]:
from unsloth import FastLanguageModel, PatchFastRL
# Execute the Patch
PatchFastRL("GRPO", FastLanguageModel)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 02-08 14:48:00 [__init__.py:216] Automatically detected platform cuda.
🦥 Unsloth Zoo will now patch everything to make training faster!
Unsloth: UnslothAlignPropTrainer is already patched.
Unsloth: UnslothBCOTrainer is already patched.
Unsloth: UnslothCPOTrainer is already patched.
Unsloth: UnslothDDPOTrainer is already patched.
Unsloth: UnslothDPOTrainer is already patched.
Unsloth: UnslothGKDTrainer is already patched.
Unsloth: UnslothGRPOTrainer is already patched.
Unsloth: UnslothIterativeSFTTrainer is already patched.
Unsloth: UnslothKTOTrainer is already patched.
Unsloth: UnslothNashMDTrainer is already patched.
Unsloth: UnslothOnlineDPOTrainer is already patched.
Unsloth: UnslothORPOTrainer is already patched.
Unsloth: UnslothPPOTrainer is already patched.
Unsloth: UnslothPRMTrainer is already patched.
Unsloth: UnslothRewardTrainer is already patched.
Unsloth: UnslothRLOOTrainer is already patched.
Uns

In [None]:
BASE_MODEL = "unsloth/qwen2.5-coder-7b-instruct-bnb-4bit"
LORA_ADAPTER = "uavleeva/grpo_mixed_run_001"

MAX_SEQ_LENGTH = 2048
FAST_INFERENCE = True

GSM8K_LIMIT = 200
SPIDER_LIMIT = 200

MAX_NEW_TOKENS = 2048


In [None]:
import re, torch, pandas as pd
from tqdm.auto import tqdm

SYSTEM_PROMPT = (
    """
    You are a helpful assistant.

    You MUST answer in exactly this structure (no text outside these tags):

    <reasoning>
    ...your step-by-step reasoning...
    </reasoning>
    <answer>
    ...final output only (see task-specific rules)...
    </answer>

    HARD REQUIREMENTS (apply to ALL tasks):
    - <reasoning> is REQUIRED AND MUST be CLOSED with </reasoning>. Do not omit it.
    - <answer> is REQUIRED AND MUST be CLOSED with </answer>.
    - Do not output anything before <reasoning> or after </answer>.
    - Do not use extra wrapper tags besides <reasoning> and <answer>.

    Task-specific rules (content inside <answer>):
    - SQL: output ONLY ONE SQL query. No explanation, no markdown/code fences.
    - MATH: output ONLY the final result (no extra text).
    - CODE (Python): output ONLY ONE fenced code block:
        ```python
        ...
        ```
      Nothing else inside <answer>. Read only with sys.stdin.read(), write only in stdout.
    """
)

ANSWER_RE = re.compile(r"<answer>\s*(.*?)\s*</answer>", re.DOTALL)

def extract_answer(t):
    m = ANSWER_RE.search(t or "")
    return m.group(1).strip() if m else ""


In [None]:
from unsloth import FastLanguageModel
from peft import PeftModel

def load_base():
    model, tok = FastLanguageModel.from_pretrained(
        BASE_MODEL,
        max_seq_length=MAX_SEQ_LENGTH,
        load_in_4bit=True,
        fast_inference=FAST_INFERENCE,
    )
    FastLanguageModel.for_inference(model)
    return model, tok

def load_lora():
    model, tok = load_base()
    model = PeftModel.from_pretrained(model, LORA_ADAPTER)
    return model, tok

#base_model, tok = load_base()
lora_model, tok_lora = load_lora()


INFO 02-08 14:48:14 [vllm_utils.py:702] Unsloth: Patching vLLM v1 graph capture
INFO 02-08 14:48:14 [vllm_utils.py:731] Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2026.1.4: Fast Qwen2 patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/qwen2.5-coder-7b-instruct-bnb-4bit with actual GPU utilization = 49.71%
Unsloth: Your GPU has CUDA compute capability 8.0 with VRAM = 79.32 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 2048. Num Sequences = 96.
Unsloth: vLLM's KV Cache can use up to 33.3 GB. Also swap space = 6 GB.
Unsloth: Disabling `di

`torch_dtype` is deprecated! Use `dtype` instead!


INFO 02-08 14:48:45 [__init__.py:1815] Using max model len 2048
INFO 02-08 14:48:49 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=8192.
Unsloth: vLLM Bitsandbytes config using kwargs = {'load_in_8bit': False, 'load_in_4bit': True, 'bnb_4bit_compute_dtype': 'bfloat16', 'bnb_4bit_quant_storage': 'uint8', 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'llm_int8_skip_modules': [], 'llm_int8_threshold': 6.0}


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/265 [00:00<?, ?B/s]

INFO 02-08 14:48:54 [core.py:76] Initializing a V1 LLM engine (v0.10.2) with config: model='unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', speculative_config=None, tokenizer='unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=2048, download_dir=None, load_format=bitsandbytes, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=bitsandbytes, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=unsloth/qwen2.5-coder-7b-instruct-bnb-4bit, enable_prefix_caching=

model.safetensors:   0%|          | 0.00/5.55G [00:00<?, ?B/s]

INFO 02-08 14:49:11 [weight_utils.py:369] Time spent downloading weights for unsloth/qwen2.5-coder-7b-instruct-bnb-4bit: 13.311652 seconds
INFO 02-08 14:49:11 [weight_utils.py:406] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 02-08 14:49:13 [punica_selector.py:19] Using PunicaWrapperGPU.
INFO 02-08 14:49:15 [gpu_model_runner.py:2392] Model loading took 5.5145 GiB and 17.532353 seconds
INFO 02-08 14:49:26 [backends.py:539] Using cache directory: /root/.cache/vllm/torch_compile_cache/e0b9df8805/rank_0_0/backbone for vLLM's torch.compile
INFO 02-08 14:49:26 [backends.py:550] Dynamo bytecode transform time: 10.46 s


Unsloth: Compiling kernels: 100%|██████████| 6/6 [00:00<00:00, 380.85it/s, triton_poi_fused_view_5]

INFO 02-08 14:49:30 [backends.py:194] Cache the graph for dynamic shape for later use



Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 29.56it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 537.97it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 583.58it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 572.37it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 610.03it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 581.09it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 548.96it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 560.11it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 565.97it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 543.48it/s, trito

INFO 02-08 14:50:05 [backends.py:215] Compiling a graph for dynamic shape takes 37.55 s
INFO 02-08 14:50:18 [monitor.py:34] torch.compile takes 48.01 s in total
INFO 02-08 14:50:20 [gpu_worker.py:298] Available KV cache memory: 32.56 GiB
INFO 02-08 14:50:21 [kv_cache_utils.py:864] GPU KV cache size: 609,680 tokens
INFO 02-08 14:50:21 [kv_cache_utils.py:868] Maximum concurrency for 2,048 tokens per request: 297.70x
INFO 02-08 14:50:21 [vllm_utils.py:707] Unsloth: Running patched vLLM v1 `capture_model`.


Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 27/27 [00:15<00:00,  1.73it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 15/15 [00:03<00:00,  3.83it/s]

INFO 02-08 14:50:41 [gpu_model_runner.py:3118] Graph capturing finished in 20 secs, took 0.89 GiB
INFO 02-08 14:50:41 [vllm_utils.py:714] Unsloth: Patched vLLM v1 graph capture finished in 20 secs.





INFO 02-08 14:50:42 [gpu_worker.py:391] Free memory on device (78.79/79.32 GiB) on startup. Desired GPU memory utilization is (0.4971440091908808, 39.43 GiB). Actual usage is 5.51 GiB for weight, 1.34 GiB for peak activation, 0.02 GiB for non-torch memory, and 0.89 GiB for CUDAGraph memory. Replace gpu_memory_utilization config with `--kv-cache-memory=33852623872` to fit into requested memory, or `--kv-cache-memory=76113316864` to fully utilize gpu memory. Current kv cache memory in use is 34962017280 bytes.
INFO 02-08 14:50:43 [core.py:218] init engine (profile, create kv cache, warmup model) took 88.06 seconds
INFO 02-08 14:50:44 [llm.py:295] Supported_tasks: ('generate',)
INFO 02-08 14:50:44 [__init__.py:36] No IOProcessor plugins requested by the model
Unsloth: Just some info: will skip parsing ['layer_norm1', 'input_layernorm', 'q_norm', 'pre_feedforward_layernorm', 'ffn_norm', 'attention_norm', 'k_norm', 'norm1', 'norm2', 'layer_norm2', 'norm', 'post_attention_layernorm', 'post_f

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

adapter_config.json: 0.00B [00:00, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/646M [00:00<?, ?B/s]

In [None]:
@torch.inference_mode()
def generate(model, tok, user):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user},
    ]

    ids = tok.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(model.device)

    out = model.generate(ids, max_new_tokens=MAX_NEW_TOKENS)
    gen = out[0][ids.shape[1]:]
    return tok.decode(gen, skip_special_tokens=True)


In [None]:
from datasets import load_dataset

def parse_gold(a):
    import re
    m = re.search(r"####\s*([-+]?\d+)", a)
    return m.group(1) if m else ""

def eval_gsm8k(model, tok):
    ds = load_dataset("openai/gsm8k", "main", split="test")
    ds = ds.select(range(GSM8K_LIMIT))

    correct = 0

    for ex in tqdm(ds):
        pred_raw = generate(model, tok, "TASK: MATH\n" + ex["question"])
        pred = extract_answer(pred_raw)

        import re
        nums = re.findall(r"-?\d+", pred)
        pred_num = nums[-1] if nums else ""

        gold = parse_gold(ex["answer"])

        correct += pred_num == gold

    acc = correct / len(ds)
    print("accuracy:", acc)
    return acc

#print("BASE:")
#eval_gsm8k(base_model, tok)

print("LORA:")
eval_gsm8k(lora_model, tok_lora)


LORA:


README.md: 0.00B [00:00, ?B/s]

main/train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

main/test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

  0%|          | 0/200 [00:00<?, ?it/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


accuracy: 0.835


0.835

In [None]:
LM_EVAL_TASKS = ["hendrycks_math", "aime", "humaneval", "mbpp", "arc_challenge", "hellaswag", "mmlu", "mmlu-pro-plus"]
LM_EVAL_LIMIT = 200      # для быстрого прогона, None = полный
LM_EVAL_NUM_FEWSHOT = 0

# One system prompt for both Hendrycks MATH and HumanEval to avoid "all-zeros" from format issues
LM_EVAL_SYSTEM_INSTRUCTION = (
    "You are a strict evaluation engine. Do not provide explanations. "
    "If the prompt contains 'Answer:' then output ONLY the final answer after it (no extra text). "
    "If the prompt is Python code (e.g., a function signature + docstring), output ONLY the code continuation: "
    "write just the function body with correct indentation, do NOT repeat the function signature, "
    "and NEVER use markdown fences."
)

# Conservative generation settings to reduce chatty/incomplete outputs on generative tasks
LM_EVAL_GEN_KWARGS = "do_sample=False,temperature=0.0,max_gen_toks=512"


In [None]:
from google.colab import drive
drive.mount("/content/drive")

ValueError: Mountpoint must not already contain files

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",trust_remote_code=True \
  --tasks hendrycks_math_algebra \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --apply_chat_template \
  --system_instruction "Return ONLY the final answer after 'Answer:'. No explanation. No extra text." \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":64,"until":["\n","Problem:"]}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_baseline_hendrycks_math_algebra.json"


2026-02-06:00:52:32 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-06:00:52:39 INFO     [_cli.run:376] Selected Tasks: ['hendrycks_math_algebra']
2026-02-06:00:52:39 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-06:00:52:39 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'trust_remote_code': True}
2026-02-06:00:52:42 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-06:00:52:44 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-06 00:52:45.101516: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770339165.122304  

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",trust_remote_code=True \
  --tasks humaneval \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --confirm_run_unsafe_code \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":512}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_baseline_humaneval.json"

2026-02-06:00:54:35 INFO     [_cli.run:376] Selected Tasks: ['humaneval']
        setting `apply_chat_template` (optionally `fewshot_as_multiturn`).
2026-02-06:00:54:35 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-06:00:54:35 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'trust_remote_code': True}
2026-02-06:00:54:39 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-06:00:54:41 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-06 00:54:42.126144: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770339282.148702   15556 cuda_dnn.cc:8579] Unable to re

In [None]:
lora_model, tok_lora = load_lora()

INFO 02-06 01:06:59 [vllm_utils.py:702] Unsloth: Patching vLLM v1 graph capture
INFO 02-06 01:06:59 [vllm_utils.py:731] Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2026.1.4: Fast Qwen2 patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.318 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/qwen2.5-coder-7b-instruct-bnb-4bit with actual GPU utilization = 24.58%
Unsloth: Your GPU has CUDA compute capability 8.0 with VRAM = 79.32 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 2048. Num Sequences = 64.
Unsloth: vLLM's KV Cache can use up to 13.36 GB. Also swap space = 6 GB.
Unsloth: Disabling `d

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 02-06 01:07:22 [gpu_model_runner.py:2392] Model loading took 5.5066 GiB and 3.369530 seconds
INFO 02-06 01:07:32 [backends.py:539] Using cache directory: /root/.cache/vllm/torch_compile_cache/10caad18e8/rank_0_0/backbone for vLLM's torch.compile
INFO 02-06 01:07:32 [backends.py:550] Dynamo bytecode transform time: 9.26 s


Unsloth: Compiling kernels: 100%|██████████| 6/6 [00:00<00:00, 400.99it/s, triton_poi_fused_view_5]

INFO 02-06 01:07:35 [backends.py:194] Cache the graph for dynamic shape for later use



Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 425.18it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 554.46it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 630.21it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 591.26it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 597.83it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 602.60it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 605.59it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 619.40it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 650.22it/s, triton_poi_fused_view_9]
Unsloth: Compiling kernels: 100%|██████████| 10/10 [00:00<00:00, 641.63it/s, trit

INFO 02-06 01:08:10 [backends.py:215] Compiling a graph for dynamic shape takes 36.48 s





INFO 02-06 01:08:16 [monitor.py:34] torch.compile takes 45.74 s in total
INFO 02-06 01:08:18 [gpu_worker.py:298] Available KV cache memory: 12.95 GiB
INFO 02-06 01:08:19 [kv_cache_utils.py:864] GPU KV cache size: 242,528 tokens
INFO 02-06 01:08:19 [kv_cache_utils.py:868] Maximum concurrency for 2,048 tokens per request: 118.42x
INFO 02-06 01:08:19 [vllm_utils.py:707] Unsloth: Running patched vLLM v1 `capture_model`.
INFO 02-06 01:08:19 [vllm_utils.py:707] Unsloth: Running patched vLLM v1 `capture_model`.


Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 19/19 [00:05<00:00,  3.80it/s]
Capturing CUDA graphs (decode, FULL): 100%|██████████| 11/11 [00:02<00:00,  3.70it/s]

INFO 02-06 01:08:27 [gpu_model_runner.py:3118] Graph capturing finished in 8 secs, took 0.51 GiB
INFO 02-06 01:08:27 [vllm_utils.py:714] Unsloth: Patched vLLM v1 graph capture finished in 8 secs.
INFO 02-06 01:08:27 [vllm_utils.py:714] Unsloth: Patched vLLM v1 graph capture finished in 8 secs.





INFO 02-06 01:08:29 [gpu_worker.py:391] Free memory on device (38.99/79.32 GiB) on startup. Desired GPU memory utilization is (0.24577064812484947, 19.49 GiB). Actual usage is 5.51 GiB for weight, 1.03 GiB for peak activation, 0.0 GiB for non-torch memory, and 0.51 GiB for CUDAGraph memory. Replace gpu_memory_utilization config with `--kv-cache-memory=13207441920` to fit into requested memory, or `--kv-cache-memory=34139050496` to fully utilize gpu memory. Current kv cache memory in use is 13907890688 bytes.
INFO 02-06 01:08:29 [core.py:218] init engine (profile, create kv cache, warmup model) took 67.02 seconds
INFO 02-06 01:08:31 [llm.py:295] Supported_tasks: ('generate',)
INFO 02-06 01:08:31 [__init__.py:36] No IOProcessor plugins requested by the model
Unsloth: Just some info: will skip parsing ['q_norm', 'norm1', 'layer_norm1', 'attention_norm', 'post_attention_layernorm', 'norm', 'input_layernorm', 'pre_feedforward_layernorm', 'k_norm', 'norm2', 'layer_norm2', 'ffn_norm', 'post_f

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_mixed_run_001",trust_remote_code=True \
  --tasks arc_challenge,hellaswag \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --apply_chat_template \
  --confirm_run_unsafe_code \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_mix_lora.json"


2026-02-08:15:30:29 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-08:15:30:36 INFO     [_cli.run:376] Selected Tasks: ['arc_challenge', 'hellaswag']
2026-02-08:15:30:36 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-08:15:30:36 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_mixed_run_001', 'trust_remote_code': True}
2026-02-08:15:30:39 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-08:15:30:41 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-08 15:30:42.360025: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_sql_run_002",trust_remote_code=True \
  --tasks arc_challenge,hellaswag \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --apply_chat_template \
  --confirm_run_unsafe_code \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_sql_lora.json"


2026-02-06:04:58:17 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-06:04:58:24 INFO     [_cli.run:376] Selected Tasks: ['arc_challenge', 'hellaswag']
2026-02-06:04:58:24 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-06:04:58:24 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_sql_run_002', 'trust_remote_code': True}
2026-02-06:04:58:27 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-06:04:58:29 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-06 04:58:30.677415: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been r

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_code_run_001",trust_remote_code=True \
  --tasks hendrycks_math_algebra \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --apply_chat_template \
  --system_instruction "Return ONLY the final answer after 'Answer:'. No explanation. No extra text." \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":64,"until":["\n","Problem:"]}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_baseline_hendrycks_math_code.json"


2026-02-06:19:54:25 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-06:19:54:31 INFO     [_cli.run:376] Selected Tasks: ['hendrycks_math_algebra']
2026-02-06:19:54:31 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-06:19:54:31 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_code_run_001', 'trust_remote_code': True}
2026-02-06:19:54:34 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-06:19:54:36 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-06 19:54:37.844368: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been regi

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_mixed_run_001",trust_remote_code=True \
  --tasks hendrycks_math_algebra \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --apply_chat_template \
  --system_instruction "Return ONLY the final answer after 'Answer:'. No explanation. No extra text." \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":64,"until":["\n","Problem:"]}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_baseline_hendrycks_math_algebra.json"


2026-02-08:16:05:31 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-08:16:05:38 INFO     [_cli.run:376] Selected Tasks: ['hendrycks_math_algebra']
2026-02-08:16:05:38 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-08:16:05:38 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_mixed_run_001', 'trust_remote_code': True}
2026-02-08:16:05:41 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-08:16:05:43 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-08 16:05:45.012182: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been reg

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_code_run_001",trust_remote_code=True \
  --tasks humaneval \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --confirm_run_unsafe_code \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":512}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_lora_humaneval_code.json"

2026-02-06:19:57:31 INFO     [_cli.run:376] Selected Tasks: ['humaneval']
        setting `apply_chat_template` (optionally `fewshot_as_multiturn`).
2026-02-06:19:57:31 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-06:19:57:31 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_code_run_001', 'trust_remote_code': True}
2026-02-06:19:57:34 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-06:19:57:36 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-06 19:57:37.581741: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770407857.604638 

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_mixed_run_001",trust_remote_code=True \
  --tasks mbpp_instruct \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --confirm_run_unsafe_code \
  --apply_chat_template \
  --system_instruction "Output ONLY Python code. No markdown fences. No explanation." \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":512}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_lora_mbpp_instruct.json"

2026-02-08:16:58:50 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-08:16:58:57 INFO     [_cli.run:376] Selected Tasks: ['mbpp_instruct']
2026-02-08:16:58:57 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-08:16:58:57 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_mixed_run_001', 'trust_remote_code': True}
2026-02-08:16:59:00 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-08:16:59:02 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-08 16:59:03.320829: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",trust_remote_code=True \
  --tasks mbpp_instruct \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --confirm_run_unsafe_code \
  --apply_chat_template \
  --system_instruction "Output ONLY Python code. No markdown fences. No explanation." \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":512}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_baseline_mbpp_instruct.json"

2026-02-08:07:25:30 INFO     [config.evaluate_config:301] Using default fewshot_as_multiturn=True.
2026-02-08:07:25:37 INFO     [_cli.run:376] Selected Tasks: ['mbpp_instruct']
2026-02-08:07:25:37 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-08:07:25:37 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'trust_remote_code': True}
2026-02-08:07:25:40 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-08:07:25:42 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-08 07:25:43.058869: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770535543.079401   19213 cu

In [None]:
!HF_ALLOW_CODE_EVAL=1 lm_eval \
  --model hf \
  --model_args pretrained="unsloth/qwen2.5-coder-7b-instruct-bnb-4bit",peft="uavleeva/grpo_mixed_run_001",trust_remote_code=True \
  --tasks humaneval \
  --num_fewshot 0 \
  --limit 200 \
  --batch_size 1 \
  --confirm_run_unsafe_code \
  --gen_kwargs '{"do_sample":false,"temperature":0.0,"max_gen_toks":512}' \
  --output_path "/content/drive/MyDrive/HSE GRPO/lm_eval_lora_humaneval.json"

2026-02-08:15:46:59 INFO     [_cli.run:376] Selected Tasks: ['humaneval']
        setting `apply_chat_template` (optionally `fewshot_as_multiturn`).
2026-02-08:15:46:59 INFO     [evaluator:211] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2026-02-08:15:46:59 INFO     [evaluator:236] Initializing hf model, with arguments: {'pretrained': 'unsloth/qwen2.5-coder-7b-instruct-bnb-4bit', 'peft': 'uavleeva/grpo_mixed_run_001', 'trust_remote_code': True}
2026-02-08:15:47:02 INFO     [models.huggingface:161] Using device 'cuda:0'
2026-02-08:15:47:04 INFO     [models.huggingface:423] Model parallel was set to False, max memory was not set, and device map was set to {'': 'cuda:0'}
2026-02-08 15:47:05.380005: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770565625.401019

In [None]:
import os, json, sqlite3, subprocess
from huggingface_hub import snapshot_download
import nltk, os

nltk_data_dir = "/root/nltk_data"
os.makedirs(nltk_data_dir, exist_ok=True)
nltk.download("punkt", download_dir=nltk_data_dir)
nltk.download("punkt_tab", download_dir=nltk_data_dir)


SPIDER_DIR = snapshot_download(
    repo_id="prem-research/spider",
    repo_type="dataset",
    local_dir="spider_data",
    local_dir_use_symlinks=False,
)

val = json.load(open("spider_data/validation.json"))

def schema(db):
    con = sqlite3.connect(db)
    cur = con.cursor()
    cur.execute("SELECT sql FROM sqlite_master WHERE type='table'")
    rows = [r[0] for r in cur.fetchall() if r[0]]
    con.close()
    return "\n".join(rows)

def run_spider(model, tok, prefix):
    gold = open(prefix+"_gold.txt","w")
    pred = open(prefix+"_pred.txt","w")

    for ex in tqdm(val[:SPIDER_LIMIT]):
        gold.write(ex["query"].replace("\n"," ")+"\t"+ex["db_id"]+"\n")

        db = f"spider_data/database/{ex['db_id']}/{ex['db_id']}.sqlite"
        prompt = "TASK: SQL\n"+schema(db)+"\nQuestion:"+ex["question"]

        raw = generate(model, tok, prompt)
        sql = extract_answer(raw).split(";")[0]+";"
        pred.write(sql+"\n")

    gold.close()
    pred.close()

#run_spider(base_model, tok, "base")
run_spider(lora_model, tok_lora, "lora")

import textwrap

cmd = [
    "python",
    "test-suite-sql-eval/evaluation.py",
    "--gold", "base_gold.txt",
    "--pred", "base_pred.txt",
    "--db", "spider_data/database",
    "--etype", "exec",
    "--plug_value",
]

p = subprocess.run(cmd, capture_output=True, text=True)
print("returncode:", p.returncode)

print("\n--- STDOUT ---\n", p.stdout[:4000])
print("\n--- STDERR ---\n", p.stderr[:4000])

if p.returncode != 0:
    raise RuntimeError("Spider eval failed, see stderr above.")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


.gitattributes: 0.00B [00:00, ?B/s]

README.md:   0%|          | 0.00/289 [00:00<?, ?B/s]

academic.sqlite:   0%|          | 0.00/123k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

activity_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

aircraft.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

allergy_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

apartment_rentals.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

architecture.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

assets_maintenance.sqlite:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/baseball_1/baseball_1.sqlite:   0%|          | 0.00/29.8M [00:00<?, ?B/s]

database/baseball_1/schema.sql:   0%|          | 0.00/57.6M [00:00<?, ?B/s]

battle_death.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

behavior_monitoring.sqlite:   0%|          | 0.00/65.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/bike_1/bike_1.sqlite:   0%|          | 0.00/1.79M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

body_builder.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

book_2.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

browser_web.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

candidate_poll.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

car_1.json: 0.00B [00:00, ?B/s]

car_1.sql:   0%|          | 0.00/963 [00:00<?, ?B/s]

car_1.sqlite:   0%|          | 0.00/65.5k [00:00<?, ?B/s]

README.CARS.TXT: 0.00B [00:00, ?B/s]

car-makers.csv:   0%|          | 0.00/588 [00:00<?, ?B/s]

car-names.csv: 0.00B [00:00, ?B/s]

cars-data.csv: 0.00B [00:00, ?B/s]

cars.desc: 0.00B [00:00, ?B/s]

continents.csv:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

countries.csv:   0%|          | 0.00/248 [00:00<?, ?B/s]

model-list.csv:   0%|          | 0.00/569 [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

annotation.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

chinook_1.sqlite:   0%|          | 0.00/901k [00:00<?, ?B/s]

cinema.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

city_record.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

climbing.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

club_1.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

coffee_shop.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

TinyCollege.sql: 0.00B [00:00, ?B/s]

college_1.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

TextBookExampleSchema.sql: 0.00B [00:00, ?B/s]

database/college_2/college_2.sqlite:   0%|          | 0.00/2.12M [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

college_3.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

company_1.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/78.0 [00:00<?, ?B/s]

company_employee.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

company_office.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

concert_singer.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

county_public_safety.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

course_teach.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Doc_Control_Systems.sqlite:   0%|          | 0.00/81.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Doc_Template_Mgt.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Doc_Tracking_DB.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Docs_and_Epenses.sqlite:   0%|          | 0.00/41.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Drama_Workshop_Groups.sqlite:   0%|          | 0.00/147k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Theme_park.sqlite:   0%|          | 0.00/94.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

csu_1.sqlite:   0%|          | 0.00/102k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

culture_company.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customer_complaints.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customer_deliveries.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_and_addresses.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_and_invoices.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_and_products_contacts.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_campaigns_ecommerce.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_card_transactions.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

debate.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

decoration_competition.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

department_management.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

department_store.sqlite:   0%|          | 0.00/86.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

device.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

document_management.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

dog_kennels.sqlite:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

dorm_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

driving_school.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

e_government.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

e_learning.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

election.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

election_representative.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

employee_hire_evaluation.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

entertainment_awards.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

entrepreneur.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

epinions_1.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

farm.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

film_rank.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

flight_1.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

README.AIRLINES.txt: 0.00B [00:00, ?B/s]

airlines.csv:   0%|          | 0.00/481 [00:00<?, ?B/s]

airports100.csv: 0.00B [00:00, ?B/s]

flights.csv: 0.00B [00:00, ?B/s]

flight_2.json: 0.00B [00:00, ?B/s]

flight_2.sql:   0%|          | 0.00/502 [00:00<?, ?B/s]

flight_2.sqlite:   0%|          | 0.00/77.8k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

database/flight_4/flight_4.sqlite:   0%|          | 0.00/3.13M [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

sql.txt:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

flight_company.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

circuits.csv: 0.00B [00:00, ?B/s]

constructorResults.csv: 0.00B [00:00, ?B/s]

constructorStandings.csv: 0.00B [00:00, ?B/s]

constructors.csv: 0.00B [00:00, ?B/s]

driverStandings.csv: 0.00B [00:00, ?B/s]

drivers.csv: 0.00B [00:00, ?B/s]

database/formula_1/data_csv/lapTimes.csv:   0%|          | 0.00/12.1M [00:00<?, ?B/s]

pitStops.csv: 0.00B [00:00, ?B/s]

qualifying.csv: 0.00B [00:00, ?B/s]

races.csv: 0.00B [00:00, ?B/s]

results.csv: 0.00B [00:00, ?B/s]

seasons.csv: 0.00B [00:00, ?B/s]

status.csv: 0.00B [00:00, ?B/s]

formula_1.sql: 0.00B [00:00, ?B/s]

database/formula_1/formula_1.sqlite:   0%|          | 0.00/2.94M [00:00<?, ?B/s]

game_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

game_injury.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

gas_company.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

geo.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

gymnast.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

hospital_1.sqlite:   0%|          | 0.00/98.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

hr_1.sqlite:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

icfp_1.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

q.txt:   0%|          | 0.00/762 [00:00<?, ?B/s]

imdb.sqlite:   0%|          | 0.00/131k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

change_date.py:   0%|          | 0.00/696 [00:00<?, ?B/s]

README.INN.TXT: 0.00B [00:00, ?B/s]

Reservations.csv: 0.00B [00:00, ?B/s]

Reservations_t.csv: 0.00B [00:00, ?B/s]

Rooms.csv:   0%|          | 0.00/627 [00:00<?, ?B/s]

inn_1.sql:   0%|          | 0.00/428 [00:00<?, ?B/s]

inn_1.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt:   0%|          | 0.00/927 [00:00<?, ?B/s]

insurance_and_eClaims.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

insurance_fnol.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

insurance_policies.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

journal_committee.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

loan_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

local_govt_and_lot.sqlite:   0%|          | 0.00/65.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

local_govt_in_alabama.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

local_govt_mdm.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

machine_repair.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

manufactory_1.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

manufacturer.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

match_season.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

medicine_enzyme_interaction.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

mountain_photos.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

movie_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

museum_visit.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

music_1.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

music_2.sqlite:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

music_4.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

musical.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

network_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

network_2.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql:   0%|          | 0.00/785 [00:00<?, ?B/s]

new_concert_singer.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

new_orchestra.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

new_pets_1.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

news_report.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

orchestra.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

party_host.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

party_people.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

performance_attendance.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

perpetrator.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

pets_1.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

phone_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

phone_market.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

pilot_record.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

poker_player.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

product_catalog.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

products_for_hire.sqlite:   0%|          | 0.00/41.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

products_gen_characteristics.sqlite:   0%|          | 0.00/41.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

program_share.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

protein_institute.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

race_track.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

railway.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

real_estate_properties.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

restaurant_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

restaurants.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql:   0%|          | 0.00/589 [00:00<?, ?B/s]

riding_club.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

roller_coaster.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/sakila_1/sakila_1.sqlite:   0%|          | 0.00/3.71M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

scholar.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

school_bus.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

school_finance.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

school_player.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

scientist_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

ship_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

ship_mission.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

shop_membership.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

singer.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

small_bank_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

database/soccer_1/schema.sql:   0%|          | 0.00/322M [00:00<?, ?B/s]

database/soccer_1/soccer_1.sqlite:   0%|          | 0.00/317M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

soccer_2.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

solvency_ii.sqlite:   0%|          | 0.00/73.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

sports_competition.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

station_weather.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/store_1/store_1.sqlite:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

store_product.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

storm_record.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

README.STUDENTS.TXT: 0.00B [00:00, ?B/s]

list.csv: 0.00B [00:00, ?B/s]

teachers.csv:   0%|          | 0.00/323 [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

student_1.sql:   0%|          | 0.00/265 [00:00<?, ?B/s]

student_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

student_assessment.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

student_transcripts_tracking.sqlite:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

swimming.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

theme_gallery.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_grants_for_research.sqlite:   0%|          | 0.00/73.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_orders.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_share_transactions.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_software_problems.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

train_station.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tvshow.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

oracle-dialects.xml: 0.00B [00:00, ?B/s]

postgres-dialects.xml: 0.00B [00:00, ?B/s]

sqlserver-dialects.xml: 0.00B [00:00, ?B/s]

twitter_1.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

university_basketball.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

voter_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

voter_2.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

wedding.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

README.WINE.txt: 0.00B [00:00, ?B/s]

appellations.csv: 0.00B [00:00, ?B/s]

grapes.csv:   0%|          | 0.00/450 [00:00<?, ?B/s]

wine.csv: 0.00B [00:00, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

wine_1.sql:   0%|          | 0.00/574 [00:00<?, ?B/s]

wine_1.sqlite:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

workshop_paper.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

world_1.json: 0.00B [00:00, ?B/s]

world_1.sqlite:   0%|          | 0.00/319k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

wrestler.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

wta_1.sql: 0.00B [00:00, ?B/s]

database/wta_1/wta_1.sqlite:   0%|          | 0.00/105M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

yelp.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

train.json:   0%|          | 0.00/35.8M [00:00<?, ?B/s]

validation.json: 0.00B [00:00, ?B/s]

  0%|          | 0/200 [00:00<?, ?it/s]

returncode: 1

--- STDOUT ---
 

--- STDERR ---
  "YEAR\s*\(\s*CURDATE\s*\(\s*\)\s*\)\s*", "2020", query, flags=re.IGNORECASE
  float_nums = re.findall("[-+]?\d*\.\d+", query)
  int_nums = [i.strip() for i in re.findall("[^tT]\d+", query)]
  table = re.findall("[Tt]\d+\.", tok)
  for table, col, val1, val2 in re.findall('(?:([^\.\s]*)\.)?([^\.\s]+) between ([^\s;]+) and ([^\s;]+)', query, re.IGNORECASE):
Traceback (most recent call last):
  File "/content/test-suite-sql-eval/evaluation.py", line 938, in <module>
    evaluate(args.gold, args.pred, args.db, args.etype, kmaps, args.plug_value, args.keep_distinct, args.progress_bar_for_each_datapoint)
  File "/content/test-suite-sql-eval/evaluation.py", line 506, in evaluate
    with open(gold) as f:
         ^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'base_gold.txt'



RuntimeError: Spider eval failed, see stderr above.

In [None]:
cmd = [
    "python", "test-suite-sql-eval/evaluation.py",
    "--gold", "lora_gold.txt",
    "--pred", "lora_pred.txt",
    "--db", "spider_data/database",
    "--etype", "exec",
    "--plug_value",
]

p = subprocess.run(cmd, capture_output=True, text=True)
print("returncode:", p.returncode)

print("\n--- STDOUT ---\n", p.stdout[:4000])
print("\n--- STDERR ---\n", p.stderr[:4000])

returncode: 0

--- STDOUT ---
                      easy                 medium               hard                 extra                all                 
count                41                   84                   35                   40                   200                 
execution            0.951                0.786                0.743                0.500                0.755               


--- STDERR ---
 


In [None]:
import os, json, sqlite3, subprocess
from huggingface_hub import snapshot_download
import nltk, os

nltk_data_dir = "/root/nltk_data"
os.makedirs(nltk_data_dir, exist_ok=True)
nltk.download("punkt", download_dir=nltk_data_dir)
nltk.download("punkt_tab", download_dir=nltk_data_dir)


SPIDER_DIR = snapshot_download(
    repo_id="prem-research/spider",
    repo_type="dataset",
    local_dir="spider_data",
    local_dir_use_symlinks=False,
)

val = json.load(open("spider_data/validation.json"))

def schema(db):
    con = sqlite3.connect(db)
    cur = con.cursor()
    cur.execute("SELECT sql FROM sqlite_master WHERE type='table'")
    rows = [r[0] for r in cur.fetchall() if r[0]]
    con.close()
    return "\n".join(rows)

def run_spider(model, tok, prefix):
    gold = open(prefix+"_gold.txt","w")
    pred = open(prefix+"_pred.txt","w")

    for ex in tqdm(val[:SPIDER_LIMIT]):
        gold.write(ex["query"].replace("\n"," ")+"\t"+ex["db_id"]+"\n")

        db = f"spider_data/database/{ex['db_id']}/{ex['db_id']}.sqlite"
        prompt = "TASK: SQL\n"+schema(db)+"\nQuestion:"+ex["question"]

        raw = generate(model, tok, prompt)
        sql = extract_answer(raw).split(";")[0]+";"
        pred.write(sql+"\n")

    gold.close()
    pred.close()

#run_spider(base_model, tok, "base")
run_spider(lora_model, tok_lora, "lora")

import textwrap

cmd = [
    "python", "test-suite-sql-eval/evaluation.py",
    "--gold", "lora_gold.txt",
    "--pred", "lora_pred.txt",
    "--db", "spider_data/database",
    "--etype", "exec",
    "--plug_value",
]

p = subprocess.run(cmd, capture_output=True, text=True)
print("returncode:", p.returncode)

print("\n--- STDOUT ---\n", p.stdout[:4000])
print("\n--- STDERR ---\n", p.stderr[:4000])

if p.returncode != 0:
    raise RuntimeError("Spider eval failed, see stderr above.")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


.gitattributes: 0.00B [00:00, ?B/s]

README.md:   0%|          | 0.00/289 [00:00<?, ?B/s]

academic.sqlite:   0%|          | 0.00/123k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

activity_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

aircraft.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

allergy_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

apartment_rentals.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

architecture.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

assets_maintenance.sqlite:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/baseball_1/baseball_1.sqlite:   0%|          | 0.00/29.8M [00:00<?, ?B/s]

database/baseball_1/schema.sql:   0%|          | 0.00/57.6M [00:00<?, ?B/s]

battle_death.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

behavior_monitoring.sqlite:   0%|          | 0.00/65.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/bike_1/bike_1.sqlite:   0%|          | 0.00/1.79M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

body_builder.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

book_2.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

browser_web.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

candidate_poll.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

car_1.json: 0.00B [00:00, ?B/s]

car_1.sql:   0%|          | 0.00/963 [00:00<?, ?B/s]

car_1.sqlite:   0%|          | 0.00/65.5k [00:00<?, ?B/s]

README.CARS.TXT: 0.00B [00:00, ?B/s]

car-makers.csv:   0%|          | 0.00/588 [00:00<?, ?B/s]

car-names.csv: 0.00B [00:00, ?B/s]

cars-data.csv: 0.00B [00:00, ?B/s]

cars.desc: 0.00B [00:00, ?B/s]

continents.csv:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

countries.csv:   0%|          | 0.00/248 [00:00<?, ?B/s]

model-list.csv:   0%|          | 0.00/569 [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

annotation.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

chinook_1.sqlite:   0%|          | 0.00/901k [00:00<?, ?B/s]

cinema.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

city_record.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

climbing.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

club_1.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

coffee_shop.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

TinyCollege.sql: 0.00B [00:00, ?B/s]

college_1.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

TextBookExampleSchema.sql: 0.00B [00:00, ?B/s]

database/college_2/college_2.sqlite:   0%|          | 0.00/2.12M [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

college_3.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

company_1.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/78.0 [00:00<?, ?B/s]

company_employee.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

company_office.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

concert_singer.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

county_public_safety.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

course_teach.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Doc_Control_Systems.sqlite:   0%|          | 0.00/81.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Doc_Template_Mgt.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Doc_Tracking_DB.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Docs_and_Epenses.sqlite:   0%|          | 0.00/41.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Drama_Workshop_Groups.sqlite:   0%|          | 0.00/147k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

cre_Theme_park.sqlite:   0%|          | 0.00/94.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

csu_1.sqlite:   0%|          | 0.00/102k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

culture_company.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customer_complaints.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customer_deliveries.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_and_addresses.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_and_invoices.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_and_products_contacts.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_campaigns_ecommerce.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

customers_card_transactions.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

debate.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

decoration_competition.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

department_management.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

department_store.sqlite:   0%|          | 0.00/86.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

device.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

document_management.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

dog_kennels.sqlite:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

dorm_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

driving_school.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

e_government.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

e_learning.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

election.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

election_representative.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

employee_hire_evaluation.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

entertainment_awards.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

entrepreneur.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

epinions_1.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

farm.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

film_rank.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

flight_1.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

README.AIRLINES.txt: 0.00B [00:00, ?B/s]

airlines.csv:   0%|          | 0.00/481 [00:00<?, ?B/s]

airports100.csv: 0.00B [00:00, ?B/s]

flights.csv: 0.00B [00:00, ?B/s]

flight_2.json: 0.00B [00:00, ?B/s]

flight_2.sql:   0%|          | 0.00/502 [00:00<?, ?B/s]

flight_2.sqlite:   0%|          | 0.00/77.8k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

database/flight_4/flight_4.sqlite:   0%|          | 0.00/3.13M [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

sql.txt:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

flight_company.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

circuits.csv: 0.00B [00:00, ?B/s]

constructorResults.csv: 0.00B [00:00, ?B/s]

constructorStandings.csv: 0.00B [00:00, ?B/s]

constructors.csv: 0.00B [00:00, ?B/s]

driverStandings.csv: 0.00B [00:00, ?B/s]

drivers.csv: 0.00B [00:00, ?B/s]

database/formula_1/data_csv/lapTimes.csv:   0%|          | 0.00/12.1M [00:00<?, ?B/s]

pitStops.csv: 0.00B [00:00, ?B/s]

qualifying.csv: 0.00B [00:00, ?B/s]

races.csv: 0.00B [00:00, ?B/s]

results.csv: 0.00B [00:00, ?B/s]

seasons.csv: 0.00B [00:00, ?B/s]

status.csv: 0.00B [00:00, ?B/s]

formula_1.sql: 0.00B [00:00, ?B/s]

database/formula_1/formula_1.sqlite:   0%|          | 0.00/2.94M [00:00<?, ?B/s]

game_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

game_injury.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

gas_company.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

geo.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

gymnast.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

hospital_1.sqlite:   0%|          | 0.00/98.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

hr_1.sqlite:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

icfp_1.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

q.txt:   0%|          | 0.00/762 [00:00<?, ?B/s]

imdb.sqlite:   0%|          | 0.00/131k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

change_date.py:   0%|          | 0.00/696 [00:00<?, ?B/s]

README.INN.TXT: 0.00B [00:00, ?B/s]

Reservations.csv: 0.00B [00:00, ?B/s]

Reservations_t.csv: 0.00B [00:00, ?B/s]

Rooms.csv:   0%|          | 0.00/627 [00:00<?, ?B/s]

inn_1.sql:   0%|          | 0.00/428 [00:00<?, ?B/s]

inn_1.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt:   0%|          | 0.00/927 [00:00<?, ?B/s]

insurance_and_eClaims.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

insurance_fnol.sqlite:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

insurance_policies.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

journal_committee.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

loan_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

local_govt_and_lot.sqlite:   0%|          | 0.00/65.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

local_govt_in_alabama.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

local_govt_mdm.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

machine_repair.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

manufactory_1.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

manufacturer.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

match_season.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

medicine_enzyme_interaction.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

mountain_photos.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

movie_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

museum_visit.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

music_1.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

music_2.sqlite:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

music_4.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

musical.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

network_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

network_2.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql:   0%|          | 0.00/785 [00:00<?, ?B/s]

new_concert_singer.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

new_orchestra.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

new_pets_1.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

news_report.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

orchestra.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

party_host.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

party_people.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

performance_attendance.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

perpetrator.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

pets_1.sqlite:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

phone_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

phone_market.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

pilot_record.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

poker_player.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

product_catalog.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

products_for_hire.sqlite:   0%|          | 0.00/41.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

products_gen_characteristics.sqlite:   0%|          | 0.00/41.0k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

program_share.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

protein_institute.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

race_track.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

railway.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

real_estate_properties.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

restaurant_1.sqlite:   0%|          | 0.00/24.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

restaurants.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql:   0%|          | 0.00/589 [00:00<?, ?B/s]

riding_club.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

roller_coaster.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/sakila_1/sakila_1.sqlite:   0%|          | 0.00/3.71M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

scholar.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

school_bus.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

school_finance.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

school_player.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

scientist_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

ship_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

ship_mission.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

shop_membership.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

singer.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

small_bank_1.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

database/soccer_1/schema.sql:   0%|          | 0.00/322M [00:00<?, ?B/s]

database/soccer_1/soccer_1.sqlite:   0%|          | 0.00/317M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

soccer_2.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

solvency_ii.sqlite:   0%|          | 0.00/73.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

sports_competition.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

station_weather.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

database/store_1/store_1.sqlite:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

store_product.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

storm_record.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

README.STUDENTS.TXT: 0.00B [00:00, ?B/s]

list.csv: 0.00B [00:00, ?B/s]

teachers.csv:   0%|          | 0.00/323 [00:00<?, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

student_1.sql:   0%|          | 0.00/265 [00:00<?, ?B/s]

student_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

student_assessment.sqlite:   0%|          | 0.00/57.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

student_transcripts_tracking.sqlite:   0%|          | 0.00/49.2k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

swimming.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

theme_gallery.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_grants_for_research.sqlite:   0%|          | 0.00/73.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_orders.sqlite:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_share_transactions.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tracking_software_problems.sqlite:   0%|          | 0.00/36.9k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

train_station.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

tvshow.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

oracle-dialects.xml: 0.00B [00:00, ?B/s]

postgres-dialects.xml: 0.00B [00:00, ?B/s]

sqlserver-dialects.xml: 0.00B [00:00, ?B/s]

twitter_1.sqlite:   0%|          | 0.00/45.1k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

university_basketball.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

voter_1.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

voter_2.sqlite:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

wedding.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

annotation.json: 0.00B [00:00, ?B/s]

README.WINE.txt: 0.00B [00:00, ?B/s]

appellations.csv: 0.00B [00:00, ?B/s]

grapes.csv:   0%|          | 0.00/450 [00:00<?, ?B/s]

wine.csv: 0.00B [00:00, ?B/s]

link.txt:   0%|          | 0.00/64.0 [00:00<?, ?B/s]

q.txt: 0.00B [00:00, ?B/s]

wine_1.sql:   0%|          | 0.00/574 [00:00<?, ?B/s]

wine_1.sqlite:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

workshop_paper.sqlite:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

world_1.json: 0.00B [00:00, ?B/s]

world_1.sqlite:   0%|          | 0.00/319k [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

wrestler.sqlite:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

wta_1.sql: 0.00B [00:00, ?B/s]

database/wta_1/wta_1.sqlite:   0%|          | 0.00/105M [00:00<?, ?B/s]

schema.sql: 0.00B [00:00, ?B/s]

yelp.sqlite:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

train.json:   0%|          | 0.00/35.8M [00:00<?, ?B/s]

validation.json: 0.00B [00:00, ?B/s]

  0%|          | 0/200 [00:00<?, ?it/s]

returncode: 0

--- STDOUT ---
                      easy                 medium               hard                 extra                all                 
count                41                   84                   35                   40                   200                 
execution            0.293                0.619                0.543                0.300                0.475               


--- STDERR ---
  "YEAR\s*\(\s*CURDATE\s*\(\s*\)\s*\)\s*", "2020", query, flags=re.IGNORECASE
  float_nums = re.findall("[-+]?\d*\.\d+", query)
  int_nums = [i.strip() for i in re.findall("[^tT]\d+", query)]
  table = re.findall("[Tt]\d+\.", tok)
  for table, col, val1, val2 in re.findall('(?:([^\.\s]*)\.)?([^\.\s]+) between ([^\s;]+) and ([^\s;]+)', query, re.IGNORECASE):



In [None]:
import textwrap

cmd = [
    "python", "test-suite-sql-eval/evaluation.py",
    "--gold", "lora_gold.txt",
    "--pred", "lora_pred.txt",
    "--db", "spider_data/database",
    "--etype", "exec",
    "--plug_value",
]

p = subprocess.run(cmd, capture_output=True, text=True)
print("returncode:", p.returncode)

print("\n--- STDOUT ---\n", p.stdout[:4000])
print("\n--- STDERR ---\n", p.stderr[:4000])

if p.returncode != 0:
    raise RuntimeError("Spider eval failed, see stderr above.")

returncode: 0

--- STDOUT ---
                      easy                 medium               hard                 extra                all                 
count                41                   84                   35                   40                   200                 
execution            0.293                0.619                0.543                0.300                0.475               


--- STDERR ---
 
