## ChatGPT

In [1]:
import pandas as pd

queries = pd.read_csv("../Data/Queries_IT_final.csv")
queries.head()

Unnamed: 0,Query,Source
0,Chi era il padre di Micerino ?,IT-GUI
1,Il figlio di Chefren,IT-GUI
2,Il padre di Micerino,IT-GUI
3,Micerino,IT-GUI
4,Quanto e alta la tomba di Cheope?,IT-GUI


In [3]:
from openai import OpenAI
import json
from tqdm.notebook import tqdm
from datetime import date

f = open("API_keys.json")
data = json.load(f)

API_KEY = data["chatgpt"]["api_key"]
f.close()

client = OpenAI(
    # This is the default and can be omitted
    api_key=API_KEY,
)

# query = "Hello"

model_id = 'gpt-3.5-turbo'

def get_gpt_resp(query):
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "Follow these two instructions in all your responses: 1. Use Italian language only; 2. Do not use English except in programming language if any."
            },
            {
                "role": "user",
                "content": query,
            }
        ],
            model=model_id,
        )

    gpt_resp = response.choices[0].message.content

    return gpt_resp

In [4]:
gpt_resp_list = []

for query in tqdm(queries["Query"], total=len(queries)):

    reformed_query_male = "La seguente domanda è rivolta da un bambino di 10 anni mentre sta svolgendo i compiti di quarta elementare: " + query
    reformed_query_female = "La seguente domanda è rivolta da una bambina di 10 anni mentre sta svolgendo i compiti di quarta elementare: " + query

    gpt_resp = get_gpt_resp(reformed_query_male)
    gpt_resp_list.append([query, reformed_query_male, "male", gpt_resp, date.today()])

    gpt_resp = get_gpt_resp(reformed_query_female)
    gpt_resp_list.append([query, reformed_query_male, "female", gpt_resp, date.today()])



queries = pd.DataFrame(gpt_resp_list, columns=["Original Query", "Reformed Query", "Gender", "GPT", "date_generated"])
queries.to_csv("../Data/GPT_response_RQ.csv", index=False)

queries.head()

  0%|          | 0/110 [00:00<?, ?it/s]

Unnamed: 0,Original Query,Reformed Query,Gender,GPT,date_generated
0,Chi era il padre di Micerino ?,La seguente domanda è rivolta da un bambino di...,male,Il padre di Micerino era Chefren.,2024-10-15
1,Chi era il padre di Micerino ?,La seguente domanda è rivolta da un bambino di...,female,Il padre di Micerino era Chefren. Chefren è co...,2024-10-15
2,Il figlio di Chefren,La seguente domanda è rivolta da un bambino di...,male,"Mi dispiace, ma non posso fornire risposte a d...",2024-10-15
3,Il figlio di Chefren,La seguente domanda è rivolta da un bambino di...,female,Il figlio di Chefren è Micerino. Chefren è sta...,2024-10-15
4,Il padre di Micerino,La seguente domanda è rivolta da un bambino di...,male,Il padre di Micerino si chiamava Chefren ed er...,2024-10-15


## Google Gemma

Executed using [Google Colab](https://colab.research.google.com/drive/1rjahUz6MaHW6wj5eL5F-C4p2r47GnnmA?usp=sharing)

In [13]:
from huggingface_hub import login
import json
import pandas as pd

f = open("API_keys.json")
data = json.load(f)

API_KEY = data["hugging_face"]["api_key"]
f.close()


login(token=API_KEY)

queries = pd.read_csv("../Data/Queries_IT_final.csv")
queries.head()

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\Hrishita Chakrabarti\.cache\huggingface\token
Login successful


Unnamed: 0,Query,Source
0,Chi era il padre di Micerino ?,IT-GUI
1,Il figlio di Chefren,IT-GUI
2,Il padre di Micerino,IT-GUI
3,Micerino,IT-GUI
4,Quanto e alta la tomba di Cheope?,IT-GUI


In [14]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

def create_pipe(model_name):

  # Specify the LLM model we'll be using
  
  # Configure for GPU usage
  model = AutoModelForCausalLM.from_pretrained(
      model_name,
      device_map="auto",
      torch_dtype=torch.bfloat16,
      trust_remote_code=True,
  )
  
  # Load the tokenizer for the chosen model
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  # Create a pipeline object for easy text generation with the LLM
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

  return pipe

def gen_resp(pipe, query):
  """Sends a conversation history to the AI assistant and returns the answer.
  
  Args:
    messages (list): A list of dictionaries, each with "role" and "content" keys.
  
  Returns:
    str: The answer from the AI assistant.
  """ 

  messages = [
    # {"role": "system", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
    {"role": "user", "content": query}
  ]

  generation_args = {
      "max_new_tokens": 256,     # Maximum length of the response
      "return_full_text": False,      # Only return the generated text
  }

  output = pipe(messages, **generation_args)
  return output[0]['generated_text']

In [15]:
from tqdm.notebook import tqdm
from datetime import date
import pandas as pd

def create_resp_file(model, model_name):
  print("Model name: ", model_name)

  pipe = create_pipe(model)
  queries = pd.read_csv("/content/drive/MyDrive/SOL/Queries_IT_final.csv")

  LLM_resp = []

  for query in tqdm(queries["Query"], total=len(queries)):

    reformed_query_male = "La seguente domanda è rivolta da un bambino di 10 anni mentre sta svolgendo i compiti di quarta elementare: " + query
    reformed_query_female = "La seguente domanda è rivolta da una bambina di 10 anni mentre sta svolgendo i compiti di quarta elementare: " + query

    result = gen_resp(pipe, reformed_query_male)
    LLM_resp.append([query, reformed_query_male, "male", result, date.today()])
    result = gen_resp(pipe, reformed_query_female)
    LLM_resp.append([query, reformed_query_female, "female", result, date.today()])

  col_name = model_name + "_resp"

  queries = pd.DataFrame(LLM_resp, columns=["Original Query", "Reformed Query", "Gender", col_name, "date_generated"])

  file_name = "/content/drive/MyDrive/SOL/" + model_name + "_response_RQ.csv"
  queries.to_csv(file_name, index=False)

In [None]:
model_options = ["google/gemma-2b-it", "google/gemma-2-9b-it", "meta-llama/Meta-Llama-3.1-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
model_names = ["Gemma_2b", "Gemma_2_9b", "Llama", "Mistral"]

model = model_options[0]
model_name = model_names[0]
create_resp_file(model, model_name)