In [1]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models
import os
import json
import pandas as pd

In [2]:
with open('gemini-ml-esg-sentiment/prompts.json', encoding='utf-8') as arq:
        prompts = json.load(arq)

prompt_string = prompts["prompts"]["prompt_positive_negative_neutral_en"]

In [3]:
dataset = pd.read_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Train_formatted.csv')
dataset_to_classify = dataset['text'].tolist()

In [4]:
def batches(lista, tamanho_lote):
    for i in range(0, len(lista), tamanho_lote):
        yield lista[i:i + tamanho_lote]

In [5]:
safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

generation_config = {
    "max_output_tokens": 40,
    "temperature": 0,
    "top_p": 0.95,
}

def generate(prompt_instance):
  vertexai.init(project="aida-412720", location="us-central1")
  model = GenerativeModel("gemini-1.0-pro")
  responses = model.generate_content(
      [prompt_instance],
      generation_config=generation_config,
      safety_settings=safety_settings,
      stream=True,
  )
  response_string = ""
  for response in responses:
    response_string = response_string + response.text
    
  return response_string

In [21]:
prompt_instance = prompt_string + dataset_to_classify[1]
respons = generate(prompt_instance)
print(respons)

## Sentiment Analysis: Neutral 

This text expresses a neutral sentiment. While the information provided is generally positive (e.g., appointment of a new chief product officer with relevant experience), it does not explicitly convey any emotional tone or opinion. The language used is objective and factual, reporting on the event without adding any subjective interpretation. 
 
Here's a breakdown of why the sentiment is neutral: 
* **Positive aspects:** 
    * The company is taking action to fill an important position. 
    * The new hire appears to be qualified for the role, based on their experience. 
* **Neutral aspects:** 
    * The text does not express any opinions or judgments about the appointment or the new hire. 
    * It simply reports on the facts of the situation. 
* **Negative aspects:** 
    * There are no negative aspects mentioned in the text. 
 
Therefore, considering the absence of emotional language and the focus on factual information, the overall sentiment of the 

In [None]:
all_responses = []
i=1
for batch in batches(dataset_to_classify, 100):
    for item in batch:
        prompt_instance = prompt_string + item
        resp = generate(prompt_instance)
        all_responses.append(resp)
        
    print("batch number ", i)
    i+=1
    

batch number  1
batch number  2
batch number  3
batch number  4
batch number  5
batch number  6
batch number  7
batch number  8
batch number  9


In [7]:
print(all_responses)



In [8]:
df_predictions = pd.DataFrame({
    "text": dataset['text'].tolist(),
    "label": dataset['label'].tolist(),
    'response': all_responses,

})

df_predictions.to_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Train_formatted_responses.csv', index=False)

In [9]:
def detect_sentiment_word(text):
    text_lower = text.lower()
    if "positive" in text_lower:
        return "positive"
    elif "negative" in text_lower:
        return "negative"
    elif "neutral" in text_lower:
        return "neutral"
    else:
        return "undetermined" 

In [10]:
responses_label = []

for response in all_responses:
    resp_label = detect_sentiment_word(response)
    responses_label.append(resp_label)

In [14]:
df_predictions_label = pd.DataFrame({
    "text": dataset['text'].tolist(),
    "label": dataset['label'].tolist(),
    'response': all_responses,
    'responseLabel': responses_label,

})

df_predictions_label.to_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Train_formatted_responses_with_label.csv', index=False)

In [3]:

responses = pd.read_csv('gemini-ml-esg-sentiment/ML-ESG-2_English_Train_formatted_responses_with_label.csv')

frequency_matrix = pd.crosstab(index=[responses['label'], responses['responseLabel']], columns='count')

frequency_matrix

Unnamed: 0_level_0,col_0,count
label,responseLabel,Unnamed: 2_level_1
Opportunity,negative,19
Opportunity,neutral,153
Opportunity,positive,522
Risk,negative,80
Risk,neutral,28
Risk,positive,6
