## Queries

In [None]:
import pandas as pd

query_file_name = ""
query_file_path = "../Data/"+query_file_name
queries = pd.read_csv(query_file_path)
queries.head()

Unnamed: 0,QID,Query,Prompt Type,Task Sentiment
0,qGEN1,tornado,General,
1,qGEN2,tornado,General,
2,qGEN3,piramide egizia,General,
3,qGEN4,piramidi,General,
4,qGEN5,qual Ã¨ la piramide egizia piÃ¹ alta,General,


In [2]:
len(queries)

293

## Bing

In [None]:
import json
from tqdm.notebook import tqdm
import httpx
from datetime import datetime

f = open("API_keys.json")
data = json.load(f)

key = data["bing_api"]
SERP_endpoint = "https://api.bing.microsoft.com/v7.0/search"
location = "global"

f.close()

headers = {
            'Ocp-Apim-Subscription-Key': key,
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'en-US,en;q=0.9',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
        }

SERP_results = []
today = datetime.now()
today = today.strftime("%Y_%m_%d")

for _, row in tqdm(queries.iterrows(), total=len(queries)):
    qid = row["QID"]
    query = row["Query"]
    prompt_type = row["Prompt Type"]
    params = {
        'q': query,
        'count': 10, # number of results to be displayed
        'setLang': 'it-IT', # setting the language of the results to Italian
        'mkt':'it-IT', # setting the market domain to Italy
        'safeSearch': 'Strict'
    }

    SERP_response = httpx.get(url=SERP_endpoint, headers=headers, params=params)
    try:
        SERP_result_set = SERP_response.json()
        rank = 1
        asked_query = SERP_result_set['queryContext']['originalQuery']
        for result in SERP_result_set['webPages']['value']:
            web_title =  result["name"]
            web_snippet = result["snippet"]
            SERP_results.append([qid, asked_query, prompt_type, "Bing", web_title + ". " + web_snippet, rank, today])
            rank += 1
    except Exception as error:
        print(error)
        SERP_results.append([qid, asked_query, prompt_type, "Bing", None, None, today])

SERP_df = pd.DataFrame(SERP_results, columns=["QID", "Query", "Prompt Type", "IAS", "Resp", "Rank", "date_generated"])
SERP_df.to_csv("../Data/bing_resp.csv", index=False)

  0%|          | 0/293 [00:00<?, ?it/s]

## ChatGPT

In [None]:
from openai import OpenAI
import json
from tqdm.notebook import tqdm
from datetime import date

f = open("API_keys.json")
data = json.load(f)

API_KEY = data["openAI_api"]
f.close()

client = OpenAI(
    # This is the default and can be omitted
    api_key=API_KEY,
)

# query = "Hello"

# model_id = 'gpt-4o-mini'
model_id = 'chatgpt-4o-latest'

def get_gpt_resp(query):
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "Follow these two instructions in all your responses: 1. Use Italian language only; 2. Do not use English except in programming language if any."
            },
            {
                "role": "user",
                "content": query,
            }
        ],
            model=model_id,
        )

    gpt_resp = response.choices[0].message.content

    return gpt_resp

# get_gpt_resp("Hello how are you?")

In [None]:
LLM_resp = []

for _, row in tqdm(queries.iterrows(), total=len(queries)):
    qid = row["QID"]
    query = row["Query"]
    prompt_type = row["Prompt Type"]

    result = get_gpt_resp(query)
    LLM_resp.append([qid, query, prompt_type, "GPT", result, date.today()])

LLM_resp_df = pd.DataFrame(LLM_resp, columns=["QID", "Query", "Prompt Type", "IAS", "Resp", "date_generated"])
LLM_resp_df.to_csv("../Data/gpt_resp.csv", index=False)

LLM_resp_df

  0%|          | 0/293 [00:00<?, ?it/s]

Unnamed: 0,QID,Query,Prompt Type,IAS,Resp,date_generated
0,qGEN1,tornado,General,GPT,Un tornado è un violento vortice d'aria in rot...,2025-02-12
1,qGEN2,tornado,General,GPT,"Il termine ""tornado"" si riferisce a un violent...",2025-02-12
2,qGEN3,piramide egizia,General,GPT,Le **piramidi egizie** sono monumenti funerari...,2025-02-12
3,qGEN4,piramidi,General,GPT,Le piramidi sono strutture architettoniche di ...,2025-02-12
4,qGEN5,qual Ã¨ la piramide egizia piÃ¹ alta,General,GPT,La piramide egizia più alta è la **Grande Pira...,2025-02-12
...,...,...,...,...,...,...
288,qEM78,Quale ghiacciaio si sta scioglendo in modo pre...,Emotionally Charged,GPT,Uno dei ghiacciai che si sta sciogliendo in mo...,2025-02-12
289,qEM79,Cosa e la isola di plastica,Emotionally Charged,GPT,L'isola di plastica è una vasta concentrazione...,2025-02-12
290,qEM80,ocean cleanup,Emotionally Charged,GPT,**The Ocean Cleanup** è un'organizzazione no-p...,2025-02-12
291,qEM81,Dove si trova Ä¾isola di plastica,Emotionally Charged,GPT,"L'isola di plastica, conosciuta anche come **G...",2025-02-12


## Google Gemma

In [None]:
import json
from huggingface_hub import whoami

f = open("API_keys.json")
data = json.load(f)
api_token = data["huggingface_api"]
f.close()

user = whoami(token=api_token)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Specify the LLM model we'll be using
model_name = "google/gemma-2-2b-it"

# Configure for GPU usage
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)

# Load the tokenizer for the chosen model
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Create a pipeline object for easy text generation with the LLM
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def get_gemma_resp(query):
  """Sends a conversation history to the AI assistant and returns the answer.

  Args:
    messages (list): A list of dictionaries, each with "role" and "content" keys.

  Returns:
    str: The answer from the AI assistant.
  """

  messages = [
      {"role":"user", "content":""},
      {"role": "assistant", "content": "Follow these two instructions in all your responses: 1. Use Italian language only; 2. Do not use English except in programming language if any."},
      {"role": "user", "content": query}
  ]

  generation_args = {
      # "max_new_tokens": 256,     # Maximum length of the response
      "return_full_text": False,      # Only return the generated text
  }

  output = pipe(messages, **generation_args)
  return output[0]['generated_text']

# gen_resp("Hi!")

In [None]:
from tqdm.notebook import tqdm
from datetime import date
import pandas as pd

def create_resp_file():
  LLM_resp = []

  for _, row in tqdm(queries.iterrows(), total=len(queries)):
      qid = row["QID"]
      query = row["Query"]
      prompt_type = row["Prompt Type"]

      result = get_gemma_resp(query)
      LLM_resp.append([qid, query, prompt_type, "Gemma", result, date.today()])

  LLM_resp_df = pd.DataFrame(LLM_resp, columns=["QID", "Query", "Prompt Type", "IAS", "Resp", "date_generated"])

  return LLM_resp_df

LLM_resp = []

In [None]:
df = create_resp_file()
file_name = "../Data/gemma_resp.csv"
df.to_csv(file_name, index=False)