### Qwen2.5-7B-Instruct LLM run on Aristotle pipeline
### This is run in Runpod/remote Jupyter environment

In [None]:
!apt update

In [None]:
!apt install git-lfs zip unzip python-is-python3 -y

In [None]:
!git lfs install

In [None]:
# Runpod is less strict
!pip install --upgrade transformers safetensors sentencepiece huggingface-hub protobuf accelerate bitsandbytes tqdm openai backoff retrying ipykernel ipywidgets matplotlib cmake scikit-build-core setuptools

In [None]:
import os, sys, subprocess

os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Reduce thread contention
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["GIT_LFS_SKIP_SMUDGE"] = "1"

In [None]:
!git clone https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GGUF LLM_MODELS_GGUF/Qwen2.5-7B-Instruct-GGUF

In [None]:
!pip install llama-cpp-python==v0.3.4 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124

In [None]:
print("Downloading llama.cpp CLI tools...")
!wget -q https://github.com/ggml-org/llama.cpp/releases/download/b7134/llama-b7134-bin-ubuntu-x64.zip -O llama_tools.zip

print("Extracting...")
!unzip -o -q llama_tools.zip -d llama_tools_bin

# Make the splitter executable
tool_path = os.path.abspath("llama_tools_bin/build/bin/llama-gguf-split")
!chmod +x {tool_path}

# Add to PATH environment variable
if "llama_tools_bin/build/bin" not in os.environ["PATH"]:
    os.environ["PATH"] += os.pathsep + os.path.dirname(tool_path)

print(f"Installed tools to: {os.path.dirname(tool_path)}")

print("\nVerifying llama-gguf-split version:")
!llama-gguf-split --help | head -n 5

In [None]:
!cd LLM_MODELS_GGUF/Qwen2.5-7B-Instruct-GGUF && git lfs pull --include "*q4_k_m*"

In [None]:
!llama-gguf-split --merge LLM_MODELS_GGUF/Qwen2.5-7B-Instruct-GGUF/qwen2.5-7b-instruct-q4_k_m-00001-of-00002.gguf LLM_MODELS_GGUF/Qwen2.5-7B-Instruct-GGUF/qwen2.5-7b-instruct-q4_k_m.gguf

In [None]:
# venv_dir = "venv"
# kernel_name = "runner_pipeline"
# display_name = "Python (runner_pipeline venv)"

# #create venv only if it doesn't already exist
# pyvenv_cfg = os.path.join(venv_dir, "pyvenv.cfg")
# if not os.path.exists(pyvenv_cfg):
#     print("venv not found, creating", venv_dir)
#     subprocess.check_call([sys.executable, "-m", "venv", venv_dir])
# else:
#     print("venv already exists at", venv_dir)
# venv_python = os.path.join(venv_dir, "bin", "python")

# if not os.path.exists(venv_python):
#     raise FileNotFoundError(f"Couldn't find python inside venv at {venv_python}")

# print("Using venv python:", venv_python)

# #install/upgrade pip and packages into the venv
# packages = [
#     "transformers","safetensors","sentencepiece","huggingface-hub",
#     "accelerate","bitsandbytes","tqdm","openai","backoff","retrying",
#     "protobuf","matplotlib","ipykernel","ipywidgets"
# ]

# # upgrade pip first
# subprocess.check_call([venv_python, "-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"])
# subprocess.check_call([venv_python, "-m", "pip", "install"] + packages)

# print("Installed packages into the venv.")

# #register the kernel (if already registered, ipykernel will usually overwrite/refresh)
# try:
#     subprocess.check_call([venv_python, "-m", "ipykernel", "install", "--user",
#                            "--name", kernel_name, "--display-name", display_name])
#     print("Registered Jupyter kernel:", display_name)
# except subprocess.CalledProcessError as e:
#     print("Kernel registration failed or already exists:", e)

# # 5) quick verification print
# print("Switch the notebook kernel to:", display_name)
# print("then 'import sys; print(sys.executable)' should show the venv python.")


In [None]:
os.environ["LLM_BACKEND"] = "llamacpp"

snapshot_path = "/workspace/LLM_MODELS_GGUF/Qwen2.5-7B-Instruct-GGUF/qwen2.5-7b-instruct-q4_k_m.gguf" # Runpod
#snapshot_path = "LLM_MODELS_GGUF/Qwen2.5-7B-Instruct-GGUF/qwen2.5-7b-instruct-q4_k_m.gguf" # Local and DGX1 HF backend

os.environ["LOCAL_MODEL_PATH"] = snapshot_path
os.environ["LLM_MODEL"] = snapshot_path

os.environ["LLM_LOAD_IN_4BIT"] = "1"  # or "0" to disable quantization

print("LOCAL_MODEL_PATH =", os.environ["LOCAL_MODEL_PATH"])
print("LLM_MODEL =", os.environ["LLM_MODEL"])
print("LLM_BACKEND =", os.environ["LLM_BACKEND"])

### If kernel doesnt recognize
LLM_MODEL=snapshot_path
LLM_BACKEND=os.environ.get("LLM_BACKEND")

### Check running venv
print(sys.executable)

In [None]:
# Change this every process (translate, decompose, search_resolve), different time limit may be needed (in seconds).
os.environ["LLM_WORKER_MAX_TIME"] = "300"
LLM_WORKER_MAX_TIME=300

# Commandline args universal 
# MAX_NEW_TOKENS is purely for text generation count limit while max_position_embeddings is for context_length based on LLM config.json. !!! input_length + MAX_NEW_TOKENS shopuld be < context_length, otherwise LLM breaks. Llama 3 only has 8k context length/max_posiiton_embedding. SEALIONv3-LLama3-8B-IT uses ROPE, max_position_embeddings follows ROPE limit 131k, Qwen2.5-7B-IT has 32k context length, SahabatAIv1-LLama3-8B-IT has 8k context length.
# Counted the response for each steps in notebook output cell with tokens counter online, translations ~400 tokens, ~decomposition ~500 tokens, search_resolve ~700 tokens. 1200 didn't cut it, need more
os.environ["MAX_NEW_TOKENS"] = "2500"
MAX_NEW_TOKENS=2500
os.environ["BATCH_NUM"] = "1"
BATCH_NUM=1

In [None]:
# Translation with original prompts
#!python translate_to_fol.py --data_path results_bahasa_translation --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file translation --split dev --save_path results_translated_translation/v3/prompts_original --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Translation with modified prompts
#!python translate_to_fol.py --data_path results_bahasa_translation --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file translation_modified --split dev --save_path results_translated_translation/v3/prompts_modified --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Translation with refined prompts
#!python translate_to_fol.py --data_path results_bahasa_translation --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file translation_refine --split dev --save_path results_translated_translation/v3/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Decomposition with refined prompts
#!python decompose_to_cnf.py --data_path results_translated_translation/v3/prompts_refine --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file and_or_decomposer_refine --save_path results_translated_decomposition/v2/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
#!python negate.py --dataset_name ProntoQA --save_path results_translated_decomposition/v2/prompts_refine --model_name $LLM_MODEL

In [None]:
# Logic Resolver with refined prompts
#!python search_resolve.py --data_path results_translated_decomposition/v2/prompts_refine --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file logic_resolver_refine --save_path results_translated_search_resolve/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --negation False --search_round 10 --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Logic Resolver with refined prompts
#!python search_resolve.py --data_path results_translated_decomposition/v2/prompts_refine --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file logic_resolver_refine --save_path results_translated_search_resolve/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --negation True --search_round 10 --max_new_tokens $MAX_NEW_TOKENS

In [None]:
#!python evaluate.py --dataset_name ProntoQA --save_path results_translated_search_resolve/v3/prompts_refine --model_name $LLM_MODEL

In [None]:
# # Naive prompting only requires True or False answer based on context
# os.environ["MAX_NEW_TOKENS"] = "600"
# MAX_NEW_TOKENS=600
# os.environ["BATCH_NUM"] = "1"
# BATCH_NUM=1

In [None]:
# Solving with naive prompting with explanations reasoning after answer
#!python naive_prompting.py --data_path results_bahasa_translation --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file naive_prompting_explanations_after_answer --split dev --save_path results_translated_naive_prompting/prompt_explanations_after_answer --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
#!python evaluate.py --dataset_name ProntoQA --save_path results_translated_naive_prompting/prompt_explanations_after_answer --model_name $LLM_MODEL --evaluation_method naive_prompting

In [None]:
# Solving with naive prompting with explanations reasoning before answer
#!python naive_prompting.py --data_path results_bahasa_translation --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file naive_prompting_explanations_before_answer --split dev --save_path results_translated_naive_prompting/prompt_explanations_before_answer --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
#!python evaluate.py --dataset_name ProntoQA --save_path results_translated_naive_prompting/prompt_explanations_before_answer --model_name $LLM_MODEL --evaluation_method naive_prompting

## Ablation

In [None]:
# Solving with fol prompting with explanations reasoning before answer
!python ablation_fol.py --data_path results_translated_translation/v3/prompts_refine --dataset_name ProntoQA --sample_pct 1 --prompts_folder manual_prompts_translated --prompts_file translations_only_prompting --save_path results_translated_fol_prompting/prompt_explanations_before_answer --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Solving with cnf prompting with explanations reasoning before answer
!python ablation_cnf.py --data_path results_translated_decomposition/v2/prompts_refine --dataset_name ProntoQA --sample_pct 1 --prompts_folder manual_prompts_translated --prompts_file decompositions_only_prompting --save_path results_translated_cnf_prompting/prompt_explanations_before_answer --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS