In [5]:
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Load the model using 4-bit quantization (1/2 size)
# Source: https://huggingface.co/blog/4bit-transformers-bitsandbytes
quantization_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config = quantization_config, device_map='cuda')

tokenizer = AutoTokenizer.from_pretrained(model_id, device_map='cuda')

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [6]:
import sys; sys.path.append("../")
from prompt import Prompt

In [7]:
print(Prompt.cot_prompt)

# Question Decomposition Specialist

    ## Background
    - You are an expert at analyzing problems and are good at breaking down difficult problems into simple problems.
    - A person facing the problem {question} is asking you for help. The question is hard to answer directly.

    ## Goal
    Helping the user decompose the question and tell the user at the right time that the problem can be solved.

    ## Constraint
    - Forget all the knowledge you've learned before and decide whether to continue decomposing the question based only on the user's answers.
    - To make it easier for the user to answer, only one simple question is asked at once.
    - You can only decompose the question, do not answer it directly.

    ## Workflow
    1. Analyse the original complex question and formulate a simple question based on that complex question.
    2. Receive the user's answer to the simple question at hand.
    2.1 If the user is unable to answer the current simple question, rephrase a

In [17]:
cot_prompt_string

[128000,
 128006,
 9125,
 128007,
 271,
 38766,
 1303,
 33025,
 2696,
 25,
 6790,
 220,
 2366,
 18,
 198,
 15724,
 2696,
 25,
 220,
 1627,
 10263,
 220,
 2366,
 19,
 271,
 2,
 16225,
 97478,
 3571,
 40420,
 271,
 262,
 7860,
 25837,
 198,
 262,
 482,
 1472,
 527,
 459,
 6335,
 520,
 42118,
 5435,
 323,
 527,
 1695,
 520,
 15061,
 1523,
 5107,
 5435,
 1139,
 4382,
 5435,
 627,
 262,
 482,
 362,
 1732,
 13176,
 279,
 3575,
 314,
 7998,
 92,
 374,
 10371,
 499,
 369,
 1520,
 13,
 578,
 3488,
 374,
 2653,
 311,
 4320,
 6089,
 382,
 262,
 7860,
 41047,
 198,
 262,
 91801,
 279,
 1217,
 29602,
 2972,
 279,
 3488,
 323,
 3371,
 279,
 1217,
 520,
 279,
 1314,
 892,
 430,
 279,
 3575,
 649,
 387,
 29056,
 382,
 262,
 7860,
 45364,
 198,
 262,
 482,
 61328,
 682,
 279,
 6677,
 499,
 3077,
 9687,
 1603,
 323,
 10491,
 3508,
 311,
 3136,
 29602,
 8478,
 279,
 3488,
 3196,
 1193,
 389,
 279,
 1217,
 596,
 11503,
 627,
 262,
 482,
 2057,
 1304,
 433,
 8831,
 369,
 279,
 1217,
 311,
 4320,
 11,
 1193

In [20]:
len(Prompt.cot_prompt.split(" ")) + 10 + len(question.split(" "))

897

In [30]:
def decompose_question(question):

    cot_prompt = [
        {'role':'system','content':Prompt.cot_prompt}, # Question Decomposition Specialist Prompt
        {'role':'user','content':f"Let's break down this complex question: {question}"}
    ]
    
    response = pipeline(
        cot_prompt,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        truncation = True,
        max_new_tokens=100
    )

    return response

In [31]:
response = decompose_question("Who was president of the United States in the year that Citibank was founded?")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': [{'role': 'system',
    'content': '# Question Decomposition Specialist\n\n    ## Background\n    - You are an expert at analyzing problems and are good at breaking down difficult problems into simple problems.\n    - A person facing the problem {question} is asking you for help. The question is hard to answer directly.\n\n    ## Goal\n    Helping the user decompose the question and tell the user at the right time that the problem can be solved.\n\n    ## Constraint\n    - Forget all the knowledge you\'ve learned before and decide whether to continue decomposing the question based only on the user\'s answers.\n    - To make it easier for the user to answer, only one simple question is asked at once.\n    - You can only decompose the question, do not answer it directly.\n\n    ## Workflow\n    1. Analyse the original complex question and formulate a simple question based on that complex question.\n    2. Receive the user\'s answer to the simple question at hand.\n   

In [32]:
response = [{'generated_text': [{'role': 'system',
    'content': '# Question Decomposition Specialist\n\n    ## Background\n    - You are an expert at analyzing problems and are good at breaking down difficult problems into simple problems.\n    - A person facing the problem {question} is asking you for help. The question is hard to answer directly.\n\n    ## Goal\n    Helping the user decompose the question and tell the user at the right time that the problem can be solved.\n\n    ## Constraint\n    - Forget all the knowledge you\'ve learned before and decide whether to continue decomposing the question based only on the user\'s answers.\n    - To make it easier for the user to answer, only one simple question is asked at once.\n    - You can only decompose the question, do not answer it directly.\n\n    ## Workflow\n    1. Analyse the original complex question and formulate a simple question based on that complex question.\n    2. Receive the user\'s answer to the simple question at hand.\n    2.1 If the user is unable to answer the current simple question, rephrase a simple question.\n    2.2 If the user answers the current simple question, analyze all currently known simple questions and user responses.\n    2.2.1 If you think that all the currently known simple questions and answers are sufficient to answer the initial complex question, say "That\'s enough."\n    2.2.2 Otherwise, ask a new simple question.\n    3. Repeat step 2 until the complex question can be answered.\n\n    ## Example\n    - Complex Question: What is the award that the director of film Wearing Velvet Slippers Under A Golden Umbrella won?\n    - Progress of Decomposition:\n        1st Simple Question: Who is the director of film Wearing Velvet Slippers Under A Golden Umbrella won?\n        1st Answer: the director of film Wearing Velvet Slippers Under A Golden Umbrella won is Wunna.\n        2nd Simple Question: What awards has Wunna won?\n        2nd Answer: Wunna won Myanmar Motion Picture Academy Awards.\n    - Final Output: That\'s enough.\n\n    - Complex Question: Are North Marion High School (Oregon) and Seoul High School both located in the same country?\n    - Progress of Decomposition:\n        1st Simple Question: what country is North Marion High School (Oregon) located in?\n        1st Answer:North Marion High School (Oregon) is  located in United States.\n        2nd Simple Question:what country is Seoul High School located in?\n        2nd Answer:Seoul High School is located in South Korea.\n    - Final Output: That\'s enough.\n\n    - Complex Question:Who is the maternal grandfather of Antiochus X Eusebes?\n    - Progress of Decomposition:\n        1st Simple Question: Who is the mother of Antiochus X Eusebes?\n        1st Answer:the mother of Antiochus X Eusebes is Cleopatra IV.\n        2nd Simple Question:who is the father of Cleopatra IV?\n        2nd Answer:the father of Cleopatra IV is Ptolemy VIII Physcon.\n    - Final Output: That\'s enough.\n\n    - Complex Question:Where was the place of death of Anastasia Of Serbia\'s husband?\n    - Progress of Decomposition:\n        1st Simple Question: Who is the husband of Anastasia Of Serbia?\n        1st Answer:the husband of Anastasia Of Serbia is Stefan Nemanja.\n        2nd Simple Question:Where was the place of death of Stefan Nemanja?\n        2nd Answer:the place of death of Stefan Nemanja is Holy Monastery Hilandar, Moni Chilandariou, Greece.\n    - Final Output: That\'s enough.\n\n    - Complex Question:Which film has the director died earlier, Condemned Women or Faces In The Dark?\n    - Progress of Decomposition:\n        1st Simple Question: What is the director of the film Condemned Women?\n        1st Answer:the director of the film Condemned Women is Lew Landers.\n        2nd Simple Question:What is the director of the film Faces In The Dark?\n        2nd Answer:the director of the film Faces In The Dark is David Eady.\n        3st Simple Question: When did Lew Landers die?\n        3st Answer:Lew Landers die on 16 December 1962.\n        4st Simple Question: When did David Eady die?\n        4st Answer:David Eady die on April 5, 2009.\n    - Final Output: That\'s enough.\n\n    ## Initialization\n    Now, a first simple question.\n    '},
   {'role': 'user',
    'content': "Let's break down this complex question: Who was president of the United States in the year that Citibank was founded?"},
   {'role': 'assistant',
    'content': "To break down this complex question, let's start with a simple question:\n\n1. In what year was Citibank founded?\n\nPlease answer this question with a specific year."}]}]

In [38]:
assistant_response = response[0]['generated_text'][-1]['content']

In [41]:
cot_prompt

[{'role': 'system',
  'content': '# Question Decomposition Specialist\n\n    ## Background\n    - You are an expert at analyzing problems and are good at breaking down difficult problems into simple problems.\n    - A person facing the problem {question} is asking you for help. The question is hard to answer directly.\n\n    ## Goal\n    Helping the user decompose the question and tell the user at the right time that the problem can be solved.\n\n    ## Constraint\n    - Forget all the knowledge you\'ve learned before and decide whether to continue decomposing the question based only on the user\'s answers.\n    - To make it easier for the user to answer, only one simple question is asked at once.\n    - You can only decompose the question, do not answer it directly.\n\n    ## Workflow\n    1. Analyse the original complex question and formulate a simple question based on that complex question.\n    2. Receive the user\'s answer to the simple question at hand.\n    2.1 If the user is un

In [40]:
assistant_response['content']

"To break down this complex question, let's start with a simple question:\n\n1. In what year was Citibank founded?\n\nPlease answer this question with a specific year."

In [42]:
import wikipedia

In [43]:
wikipedia.search(assistant_response['content'])

[]