#Gemini-1.5-Pro Evaluation



Hardware - CPU

# Installations


In [None]:
! pip install -U google-generativeai



#00 - Google Drive Mount



In [None]:
from google.colab import drive
drive.mount('./gdrive', force_remount=True)

Mounted at ./gdrive


# 01 - Imports



In [None]:
import os
import glob
import re
import numpy as np
import pandas as pd
import json
import google.generativeai as genai

from time import sleep
from google.colab import userdata
from pathlib import Path

# 02 - Constants



In [None]:
MODEL_NAME = "gemini-1.5-pro-001"

PREPROCESSED_DATA_ROOT_PATH = userdata.get('IA_DATA_PREPROCESSED')

TEST_DATASETS = {Path(dataset).stem.split("_test")[0]: {"df": pd.read_csv(dataset), "path":Path(dataset).parent} for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_test.csv')}
DEMO_DATASETS = {Path(dataset).stem.split("_demo")[0]: {"df": pd.read_csv(dataset), "path":Path(dataset).parent} for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_demo.csv')}
RESULT_DATASETS = [Path(dataset).stem.split(f"_{MODEL_NAME}_result_v2")[0] for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_{MODEL_NAME}_result_v2.csv')]


GOOGLE_API_KEY=userdata.get(GOOGLE_API_KEY)

BASE_INSTRUCTION = """Você deverá realizar a tarefa de Classificação de Sentimento Binária em relação a polaridade de textos escritos no idioma português brasileiro considerando dois possíveis rótulos de saída: 1 para o sentimentos positivos ou -1 para negativos. A saída produzida deverá ser em formato JSON, seguindo o esquema definido entre os marcadores ```.```
{'type': 'object','description': Objeto de saída fornecido pelo classificador após a classificação de sentimento do texto de entrada.', 'properties': {'polaridade': {'type': 'integer','description': 'Polaridade em relação ao sentimento expressado no texto de entrada. Pode assumir 2 valores: [-1, 1]','enum': [-1,1]}},
  'required': ['polaridade']}```Considere os seguintes exemplos para realizar a predição:"""

# 03 - Functions


In [None]:
def generate_sorted_examples(dataframe:pd.DataFrame)->str:
    """
    Generate a string of sorted examples from a DataFrame for sentiment analysis.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the examples.
                                  It should have columns 'text' and 'label'.

    Returns:
        str: A string containing formatted examples of input text and their corresponding polarities.
    """
    examples = ''
    for i, _ in enumerate(dataframe[:3].index):
        examples = examples + (
        "\n"
        f"Exemplo:\n"
        f"'entrada': '{dataframe['text'][i]}'\n"
        "'saida':{'polaridade': " + f"{dataframe['label'][i]}"+"}"
        "\n"
        f"Exemplo:\n"
        f"'entrada': '{dataframe['text'][i+3]}'\n"
        "'saida':{'polaridade': " + f"{dataframe['label'][i+3]}"+"}")
    return examples


def generate_classification_text(text:str)->str:
    """
    Generate a formatted string for sentiment classification input and output.

    Args:
        text (str): The input text to be classified.

    Returns:
        str: A string formatted for sentiment classification showing the input text.
    """
    classification = (
        "\n"
        f"Classificação de Sentimento:"
        f"'entrada': '{text}'"
        "'saida':")
    return classification

# 04 - Execution

##04.01 - Authentication

In [None]:
try:
  genai.configure(api_key=GOOGLE_API_KEY)
except userdata.SecretNotFoundError as e:
   print(f'Secret not found\n\nThis expects you to create a secret named {GEMINI_API_SECRET_NAME} in Colab\n\nVisit https://makersuite.google.com/app/apikey to create an API key\n\nStore that in the secrets section on the left side of the notebook (key icon)\n\nName the secret {GEMINI_API_SECRET_NAME}')
   raise e
except userdata.NotebookAccessError as e:
  print(f'You need to grant this notebook access to the {GEMINI_API_SECRET_NAME} secret in order for the notebook to access Gemini on your behalf.')
  raise e
except Exception as e:
  # unknown error
  print(f"There was an unknown error. Ensure you have a secret {GEMINI_API_SECRET_NAME} stored in Colab and it's a valid key from https://makersuite.google.com/app/apikey")
  raise e

In [None]:
generation_config=genai.types.GenerationConfig(candidate_count=1,
                                               max_output_tokens=20,
                                               temperature=0,
                                               response_mime_type="application/json")

In [None]:
for key, dataset in TEST_DATASETS.items():
    if key not in RESULT_DATASETS:
        print(f'Starting to evaluate dataset: {key}')
        results_list = []
        examples = generate_sorted_examples(DEMO_DATASETS[key]['df'])
        instruction = BASE_INSTRUCTION + examples

        model = genai.GenerativeModel(
            'gemini-1.5-pro',
            system_instruction = instruction
            )

        messages = []

        for index, item in enumerate(dataset['df']['text']):
            classification_text = generate_classification_text(item)
            messages.append({
                'role':'user',
                'parts':[classification_text]
                })

            response = model.generate_content(messages, generation_config=generation_config)
            try:
                results_list.append(response.text)
            except ValueError as e:
                print(f'Error: {e}')
                results_list.append(response)
            messages.pop()
            sleep(3)

        dataset['df']['predictions'] = results_list
        dataset['df'].to_csv(f'{str(dataset["path"])}/{key}_{MODEL_NAME}_result_v2.csv', index=False)
        RESULT_DATASETS = [Path(dataset).stem.split(f"_{MODEL_NAME}_result_v2")[0] for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_{MODEL_NAME}_result_v2.csv')]

        print(f'The evaluation of dataset:{key} has ended.')