In [2]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models
import os
import json
import pandas as pd

In [2]:
with open('gemini-ml-esg-sentiment/prompts.json', encoding='utf-8') as arq:
        prompts = json.load(arq)

prompt_string = prompts["prompts"]["prompt_positive_negative_neutral_en"]

In [4]:
dataset = pd.read_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Testset_formatted.csv')
dataset_to_classify = dataset['text'].tolist()

In [5]:
def batches(lista, tamanho_lote):
    for i in range(0, len(lista), tamanho_lote):
        yield lista[i:i + tamanho_lote]

In [6]:
safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

generation_config = {
    "max_output_tokens": 40,
    "temperature": 0,
    "top_p": 0.95,
}

def generate(prompt_instance):
  vertexai.init(project="aida-412720", location="us-central1")
  model = GenerativeModel("gemini-1.0-pro")
  responses = model.generate_content(
      [prompt_instance],
      generation_config=generation_config,
      safety_settings=safety_settings,
      stream=True,
  )
  response_string = ""
  for response in responses:
    response_string = response_string + response.text
    
  return response_string

In [7]:
prompt_instance = prompt_string + dataset_to_classify[1]
respons = generate(prompt_instance)
print(respons)

## Sentiment Analysis: Positive

The text expresses a positive sentiment for several reasons:

* **Focus on positive actions:** The text highlights AT&T's initiative to open three new Connected Learning Centers in Los Angeles, providing internet access and education tools to underserved communities. This action directly addresses the issue of bridging the digital divide and promoting educational opportunities.
* **Use of positive language:** Words like "vital", "success", "responsible", "leadership", and "training" evoke a sense of


In [8]:
all_responses = []
i=1
for batch in batches(dataset_to_classify, 100):
    for item in batch:
        prompt_instance = prompt_string + item
        resp = generate(prompt_instance)
        all_responses.append(resp)
        
    print("batch number ", i)
    i+=1
    

batch number  1
batch number  2
batch number  3


In [9]:
print(all_responses)

['## Sentiment Analysis: \n\nThe sentiment expressed in the text is **Positive**. \n\nHere\'s why:\n\n* **Positive words and phrases:** The text uses words and phrases like "empowers," "move the world forward," "expanding digital access," "protecting the climate," "ensuring people have the skills needed for jobs of the future," and "responsible business goals." These words and phrases all convey a sense of optimism, progress, and positive impact.\n* **Focus', 'Positive. The text describes the opening of new AT&T Connected Learning Centers in Los Angeles, which will provide internet access and education tools for those who face connectivity barriers. This is a positive development that will help to improve the lives of many people in the community. \n \n The text uses positive language such as "vital to their long-term success" and "positive impact on the lives of many people". It also mentions that AT&T has already opened one center and that two more', '## Sentiment Analysis: \n\nThe o

In [10]:
df_predictions = pd.DataFrame({
    "text": dataset['text'].tolist(),
    "label": dataset['label'].tolist(),
    'response': all_responses,

})

df_predictions.to_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Testset_formatted_responses.csv', index=False)

In [11]:
def detect_sentiment_word(text):
    text_lower = text.lower()
    if "positive" in text_lower:
        return "positive"
    elif "negative" in text_lower:
        return "negative"
    elif "neutral" in text_lower:
        return "neutral"
    else:
        return "undetermined" 

In [12]:
responses_label = []

for response in all_responses:
    resp_label = detect_sentiment_word(response)
    responses_label.append(resp_label)

In [13]:
df_predictions_label = pd.DataFrame({
    "text": dataset['text'].tolist(),
    "label": dataset['label'].tolist(),
    'response': all_responses,
    'responseLabel': responses_label,

})

df_predictions_label.to_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Testset_formatted_responses_with_label.csv', index=False)

In [3]:

responses = pd.read_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Testset_formatted_responses_with_label.csv')

frequency_matrix = pd.crosstab(index=[responses['label'], responses['responseLabel']], columns='count')

frequency_matrix

Unnamed: 0_level_0,col_0,count
label,responseLabel,Unnamed: 2_level_1
Opportunity,negative,7
Opportunity,neutral,52
Opportunity,positive,132
Risk,negative,15
Risk,neutral,12
