# Falcon Baseline (no RAG)

In [2]:
import requests
import json
from tqdm import tqdm
import csv
import re
url = 'https://a7tncd274b.execute-api.eu-west-3.amazonaws.com/InitialStage/Falcon7B'
# Set the appropriate headers for a JSON payload
headers = {
    "Content-Type": "application/json"
}

In [8]:
def create_prompt(question, options):
    options_text = "\n".join([f"Option {i+1}: {opt[1]}" for i, opt in enumerate(options)])
    # abbreviations is a list of dictionaries of form {"abbreviation": "full form"}
    # abbreviations_text = "\n".join([f"{list(abbrev.keys())[0]}: {list(abbrev.values())[0]}" for abbrev in abbreviations])
    prompt = (
        f"Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.\n"
        f"The output should be in the format: Option <Option id>\n\n"
        f"Provide the answer to the following multiple choice question in the specified format.\n"
        # f"Context:\n{context}\n\n"
        # f"Abbreviations:\n{abbreviations_text}\n\n"
        f"Question: {question}\n"
        f"Options:\n{options_text}\n"
        f"Answer: Option"
    )
    return prompt

In [9]:
def get_api_response(prompt):
    data = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 20,
            "return_full_text": False,
            "do_sample": False,
            "top_k": 1,
            "stop": ["Answer: Option"]
        }
    }
    json_data = json.dumps(data)
    try:
        response = requests.post(url, data=json_data, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            return f"Error: {response.status_code}"
    except Exception as e:
        return f"An error occurred: {e}"

In [10]:
# Read questions from the JSON file
with open('data/TeleQnA_testing1.txt', 'r') as file1:
  with open('data/questions_new.txt', 'r') as file2:
    questions = json.load(file1)
    # questions.update(json.load(file2))

In [11]:
# questions = dict(list(questions.items())[1810:])

In [12]:
responses = []

# Loop through each question and get the response
for q_id, q_data in tqdm(questions.items(), desc="Processing questions"):
    q_id_number = q_id.split()[1]
    question_text = q_data["question"]
    question_text = re.sub(r'\s*\[.*?\]\s*$', '', question_text) # remove tags
    options = [v for k, v in q_data.items() if k.startswith("option")]
    prompt = create_prompt(question_text, options)
    response = get_api_response(prompt)
    response = json.loads(json.loads(response)["body"])[0]["generated_text"]
    print(f"\n{prompt}\n{response}")

    # Create a mapping of letters to numbers
    # letter_to_number = {chr(65 + i): i + 1 for i in range(26)}

    # get the first number
    match = re.search(r'(\d+)', response)
    if match:
        try:
            answer_id = int(match.group(1))
            responses.append([q_id_number, answer_id, "Falcon 7.5B"])
        except ValueError:
            responses.append([q_id_number, "Error", "Falcon 7.5B"])
            print(f"Error processing question {q_id_number}: {response}")
            break

    else:
        # Check if the response is one of the options
        option_found = False
        for idx, option in enumerate(options):
            if option in response:
                print(f"Matched option in response: {option} to {idx}")
                responses.append([q_id_number, idx + 1, "Falcon 7.5B"])
                option_found = True
                break

        if not option_found:
            responses.append([q_id_number, "Error", "Falcon 7.5B"])
            print(f"Error processing question {q_id_number}: {response}")
            break

# Save responses to a CSV file
with open('output_results.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["Question_ID", "Answer_ID", "Task"])
    csvwriter.writerows(responses)

print("Processing complete. Responses saved to 'output_results.csv'.")

Processing questions:   0%|          | 1/366 [00:01<09:08,  1.50s/it]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: When can a gNB transmit a DL transmission(s) on a channel after initiating a channel occupancy?
Options:
Option 1: e
Option 2: f
Option 3: o
Option 4: o
Answer: Option
 2: f


Processing questions:   1%|          | 2/366 [00:01<05:21,  1.13it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What does OTA REFSENS requirement ensure?
Options:
Option 1: h
Option 2: h
Option 3: h
Option 4: h
Option 5: h
Answer: Option
 1: h


Processing questions:   1%|          | 3/366 [00:02<03:57,  1.53it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What is the role of MDA MnS producer in the management function?
Options:
Option 1: r
Option 2: o
Option 3: r
Option 4: o
Option 5: r
Answer: Option
 1


Processing questions:   1%|          | 4/366 [00:02<03:31,  1.71it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What type of subcarrier spacing requirements does an NB-IoT Base Station support?
Options:
Option 1: 5
Option 2: .
Option 3: o
Option 4: o
Answer: Option
 2:.


Processing questions:   1%|▏         | 5/366 [00:03<03:03,  1.96it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What is the purpose of the IAB-node OAM procedure?
Options:
Option 1: o
Option 2: o
Option 3: o
Option 4: o
Answer: Option
 1


Processing questions:   2%|▏         | 6/366 [00:03<02:46,  2.16it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What is the purpose of the Control Plane Protocol Stack for the service-based interface?
Options:
Option 1: o
Option 2: o
Option 3: o
Option 4: o
Option 5: o
Answer: Option
 1


Processing questions:   2%|▏         | 7/366 [00:03<02:36,  2.30it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What is the purpose of the Diameter Load Control Mechanism?
Options:
Option 1: o
Option 2: o
Option 3: o
Option 4: l
Option 5: o
Answer: Option
 1


Processing questions:   2%|▏         | 8/366 [00:04<03:13,  1.85it/s]


Instruct: You will answer each question correctly by giving only the Option ID, the number that follows each Option.
The output should be in the format: Option <Option id>

Provide the answer to the following multiple choice question in the specified format.
Question: What message does the AMF send to the NG-RAN during mobility registration, AM policy modification, and handover procedures?
Options:
Option 1: G
Option 2: u
Option 3: l
Option 4: E
Option 5: 2
Answer: Option
 2





IndexError: string index out of range