### Qwen2.5-7B-Instruct LLM run on Aristotle pipeline
### This is run in Runpod/remote Jupyter environment

In [None]:
!apt update

In [None]:
!apt install git-lfs

In [None]:
!pip install transformers safetensors sentencepiece huggingface-hub accelerate bitsandbytes tqdm openai backoff retrying protobuf

In [None]:
!git lfs install

In [None]:
!git clone https://huggingface.co/Qwen/Qwen2.5-7B-Instruct LLM_MODELS/Qwen2.5-7B-Instruct

In [1]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
######### Also useful to reduce thread contention:
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"

snapshot_path = "/workspace/LLM_MODELS/Qwen2.5-7B-Instruct" ############## <--- Change this based on platform and models
#snapshot_path = "Qwen/Qwen2.5-7B-Instruct"

os.environ["LOCAL_MODEL_PATH"] = snapshot_path
os.environ["LLM_MODEL"] = snapshot_path

######## enable 4-bit for quants (and bitsandbytes is set up)
os.environ["LLM_LOAD_IN_4BIT"] = "1"  # or "0" to disable quantization

print("LOCAL_MODEL_PATH =", os.environ["LOCAL_MODEL_PATH"])
print("LLM_MODEL =", os.environ["LLM_MODEL"])

### If kernel doesnt recognize
LLM_MODEL=snapshot_path
LLM_WORKER_MAX_TIME=450

LOCAL_MODEL_PATH = /workspace/LLM_MODELS/Qwen2.5-7B-Instruct
LLM_MODEL = /workspace/LLM_MODELS/Qwen2.5-7B-Instruct


In [None]:
#Test for Max Time generation stopping criteria
from llm_backends import HFBackend
hb = HFBackend(local_model_path=snapshot_path, quantize_4bit=True)
print("Calling short test (3s max_time)...")
res = hb.generate("Write a long list of words and sentences: ", max_new_tokens=1024, max_time=3.0)
print("Result length:", len(res))
print(res[:1000])

print("Calling short test (10s max_time)...")
res = hb.generate("Write a long list of words and sentences: ", max_new_tokens=1024, max_time=10.0)
print("Result length:", len(res))
print(res[:1000])

In [None]:
# Change this every process (translate, decompose, search_resolve), different time limit may be needed (in seconds).
os.environ["LLM_WORKER_MAX_TIME"] = "300"
LLM_WORKER_MAX_TIME=300

# Commandline args universal 
# MAX_NEW_TOKENS is purely for text generation count limit while max_position_embeddings is for context_length based on LLM config.json. !!! input_length + MAX_NEW_TOKENS shopuld be < context_length, otherwise LLM breaks. Llama 3 only has 8k context length/max_posiiton_embedding. SEALIONv3-LLama3-8B-IT uses ROPE, max_position_embeddings follows ROPE limit 131k, Qwen2.5-7B-IT has 32k context length, SahabatAIv1-LLama3-8B-IT has 8k context length.
# Counted the response for each steps in notebook output cell with tokens counter online, translations ~400 tokens, ~decomposition ~500 tokens, search_resolve ~700 tokens
os.environ["MAX_NEW_TOKENS"] = "1500"
MAX_NEW_TOKENS=1500
os.environ["BATCH_NUM"] = "1"
BATCH_NUM=1

In [None]:
# Translation with original prompts
#!python translate_to_fol.py --data_path manual_data_translated --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file translation --split dev --save_path results_translated_translation/v3/prompts_original --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Translation with modified prompts
#!python translate_to_fol.py --data_path manual_data_translated --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file translation_modified --split dev --save_path results_translated_translation/v3/prompts_modified --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Translation with refined prompts
#!python translate_to_fol.py --data_path manual_data_translated --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file translation_refine --split dev --save_path results_translated_translation/v3/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Decomposition with refined prompts
#!python decompose_to_cnf.py --data_path results_translated_translation/v3/prompts_refine --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file and_or_decomposer_refine --save_path results_translated_decomposition/v2/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --max_new_tokens $MAX_NEW_TOKENS

In [None]:
#!python negate.py --dataset_name ProntoQA --save_path results_translated_decomposition/v2/prompts_refine --model_name $LLM_MODEL

In [None]:
# Logic Resolver with refined prompts
!python search_resolve.py --data_path results_translated_decomposition/v2/prompts_refine --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file logic_resolver_refine --save_path results_translated_search_resolve/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --negation False --search_round 10 --max_new_tokens $MAX_NEW_TOKENS

In [None]:
# Logic Resolver with refined prompts
!python search_resolve.py --data_path results_translated_decomposition/v2/prompts_refine --dataset_name ProntoQA --sample_pct 100 --prompts_folder manual_prompts_translated --prompts_file logic_resolver_refine --save_path results_translated_search_resolve/prompts_refine --model_name $LLM_MODEL --batch_num $BATCH_NUM --negation True --search_round 10 --max_new_tokens $MAX_NEW_TOKENS

In [3]:
!python evaluate.py --dataset_name ProntoQA --save_path results_translated_search_resolve/v3/prompts_refine --model_name $LLM_MODEL

ID: ProntoQA_102, GT: B, Negated: C, Non-negated: C
ID: ProntoQA_102, GT: B, Ans1: C, Ans2: C
ID: ProntoQA_111, GT: B, Negated: C, Non-negated: C
ID: ProntoQA_111, GT: B, Ans1: C, Ans2: C
ID: ProntoQA_112, GT: B, Negated: C, Non-negated: B
ID: ProntoQA_112, GT: B, Ans1: C, Ans2: B
ID: ProntoQA_113, GT: A, Negated: C, Non-negated: C
ID: ProntoQA_113, GT: A, Ans1: C, Ans2: C
ID: ProntoQA_115, GT: B, Negated: C, Non-negated: C
ID: ProntoQA_115, GT: B, Ans1: C, Ans2: C
ID: ProntoQA_120, GT: A, Negated: C, Non-negated: C
ID: ProntoQA_120, GT: A, Ans1: C, Ans2: C
ID: ProntoQA_126, GT: B, Negated: C, Non-negated: C
ID: ProntoQA_126, GT: B, Ans1: C, Ans2: C
ID: ProntoQA_13, GT: A, Negated: C, Non-negated: C
ID: ProntoQA_13, GT: A, Ans1: C, Ans2: C
ID: ProntoQA_14, GT: B, Negated: C, Non-negated: C
ID: ProntoQA_14, GT: B, Ans1: C, Ans2: C
ID: ProntoQA_141, GT: B, Negated: C, Non-negated: C
ID: ProntoQA_141, GT: B, Ans1: C, Ans2: C
ID: ProntoQA_143, GT: B, Negated: C, Non-negated: B
ID: ProntoQA