## Queries

In [34]:
import pandas as pd

query_file_name = "Queries_final.csv"
query_file_path = "../Data/Queries/"+query_file_name
queries = pd.read_csv(query_file_path)
queries.head()

Unnamed: 0,QID,Query,Prompt Type,Task Sentiment
0,QGEN1,tornado,General,
1,QGEN2,tornado,General,
2,QGEN3,piramide egizia,General,
3,QGEN4,piramidi,General,
4,QGEN5,qual Ã¨ la piramide egizia piÃ¹ alta,General,


In [35]:
len(queries)

293

# Prompts

In [None]:
baseline = "Follow these instructions in all your responses: 1. Use Italian language only; 2. Do not use English except in programming language if any."
user_group_aware = "Follow these instructions in all your responses: 1. Use Italian language only; 2. Do not use English except in programming language if any.; 3. The response is meant for a child who is 9-11 years old."
user_need_aware = "Follow these instructions in all your responses: 1. Use Italian language only; 2. Do not use English except in programming language if any.; 3. The response is meant for a child who is 9-11 years old.; 4. Be mindful of the child's emotional intelligence when generating the response."

prompts_list = {
    "baseline": baseline,
    "user_group_aware": user_group_aware,
    "user_need_aware": user_need_aware
}

## ChatGPT

In [37]:
from openai import OpenAI
import json
from tqdm.notebook import tqdm
from datetime import date

f = open("API_keys.json")
data = json.load(f)

API_KEY = data["openAI_api2"]
f.close()

client = OpenAI(
    # This is the default and can be omitted
    api_key=API_KEY,
)


model_id = 'chatgpt-4o-latest'

def get_gpt_resp(query, instructions):
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": instructions
            },
            {
                "role": "user",
                "content": query,
            }
        ],
            model=model_id,
        )

    gpt_resp = response.choices[0].message.content

    return gpt_resp

# get_gpt_resp("Hello how are you?", "Respond in Italian.")

In [None]:
for prompt in prompts_list:
    LLM_resp = []
    for _, row in tqdm(queries.iterrows(), total=len(queries)):
        qid = row["QID"]
        query = row["Query"]
        task_sent = row["Task Sentiment"]
        instructions = prompts_list[prompt]
        # print(instructions)
        result = get_gpt_resp(query, instructions)
        # print(prompt)
        LLM_resp.append([prompt, qid, task_sent, "GPT", result, date.today()])

    LLM_resp_df = pd.DataFrame(LLM_resp, columns=["Prompt Type", "QID", "Task Sentiment", "IAS", "Resp", "date_generated"])
    file_name = "gpt_resp_" + prompt + ".csv"
    LLM_resp_df.to_csv("../Data/"+file_name, index=False)
    # print(LLM_resp_df.head(2))

## Google Gemma

In [33]:
import json
from huggingface_hub import whoami

f = open("API_keys.json")
data = json.load(f)
api_token = data["huggingface_api"]
f.close()

user = whoami(token=api_token)

In [None]:
from huggingface_hub import login

login()

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Specify the LLM model we'll be using
model_name = "google/gemma-2-2b-it"

# Configure for GPU usage
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)

# Load the tokenizer for the chosen model
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Create a pipeline object for easy text generation with the LLM
pipe = pipeline("text-generation", model=model_name)

def get_gemma_resp(query, instructions):
  """Sends a conversation history to the AI assistant and returns the answer.

  Args:
    messages (list): A list of dictionaries, each with "role" and "content" keys.

  Returns:
    str: The answer from the AI assistant.
  """

  messages = [
      {"role":"user", "content":""},
      {"role": "assistant", "content": instructions},
      {"role": "user", "content": query}
  ]

  generation_args = {
      # "max_new_tokens": 256,     # Maximum length of the response
      "return_full_text": False,      # Only return the generated text
  }

  output = pipe(messages, **generation_args)
  return output[0]['generated_text']

get_gemma_resp("Hi!", "Respond in Italian.")

In [None]:
from tqdm.notebook import tqdm
from datetime import date
import pandas as pd

for prompt in prompts_list:
    LLM_resp = []
    for _, row in tqdm(queries.iterrows(), total=len(queries)):
        qid = row["QID"]
        query = row["Query"]
        task_sent = row["Task Sentiment"]
        instructions = prompts_list[prompt]
        # print(instructions)
        result = get_gemma_resp(query, instructions)
        # print(prompt)
        LLM_resp.append([prompt, qid, task_sent, "GEM", result, date.today()])

    LLM_resp_df = pd.DataFrame(LLM_resp, columns=["Prompt Type", "QID", "Task Sentiment", "IAS", "Resp", "date_generated"])
    file_name = "gemma_resp_" + prompt + ".csv"
    LLM_resp_df.to_csv("../Data/"+file_name, index=False)