<a href="https://colab.research.google.com/github/Cleander/analise-de-sentimentos/blob/main/analise_de_sentimentos_bertimbal_pi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install transformers datasets torch pandas

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import pandas as pd

df = pd.read_csv('olist.csv')

df.head()

Unnamed: 0,original_index,review_text,review_text_processed,review_text_tokenized,polarity,rating,kfold_polarity,kfold_rating
0,97262,Perfeito....chegou antes do prazo.....,perfeito....chegou antes do prazo.....,"['perfeito', 'chegou', 'antes', 'do', 'prazo']",1.0,5,1,1
1,72931,Foi uma ótima compra! Chegou antes mesmo do pr...,foi uma otima compra! chegou antes mesmo do pr...,"['foi', 'uma', 'otima', 'compra', 'chegou', 'a...",1.0,5,1,1
2,19659,Recebi muito rapido e um otimo custo beneficio,recebi muito rapido e um otimo custo beneficio,"['recebi', 'muito', 'rapido', 'um', 'otimo', '...",1.0,5,1,1
3,43054,Recomendo,recomendo,['recomendo'],1.0,5,1,1
4,59202,Só veio uma capa comprei 3 aí paguei. Mais de ...,so veio uma capa comprei 3 ai paguei. mais de ...,"['so', 'veio', 'uma', 'capa', 'comprei', 'ai',...",0.0,1,1,1


In [None]:
df.isnull().sum()

Unnamed: 0,0
original_index,0
review_text,0
review_text_processed,1
review_text_tokenized,0
polarity,3665
rating,0
kfold_polarity,0
kfold_rating,0


In [None]:
print(f"Linhas antes da limpeza: {df.shape[0]}")
df = df.dropna(subset=['review_text_tokenized', 'polarity'])
print(f"Linhas após a limpeza: {df.shape[0]}")

Linhas antes da limpeza: 41744
Linhas após a limpeza: 38079


In [None]:
train_data = df[(df['kfold_polarity'] >= 2) & (df['kfold_polarity'] <= 8)]
val_data = df[df['kfold_polarity'] == 9]
test_data = df[df['kfold_polarity'] == 1]

print(f"Treinamento: {len(train_data)}")
print(f"Validação: {len(val_data)}")
print(f"Teste: {len(test_data)}")

Treinamento: 26656
Validação: 3808
Teste: 3808


In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased')

In [None]:
def tokenize_function(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=128)

train_encodings = tokenize_function(train_data['review_text_tokenized'].tolist())
val_encodings = tokenize_function(val_data['review_text_tokenized'].tolist())
test_encodings = tokenize_function(test_data['review_text_tokenized'].tolist())

In [None]:
import torch
from datasets import Dataset

train_labels = torch.tensor(train_data['polarity'].values, dtype=torch.long)
val_labels = torch.tensor(val_data['polarity'].values, dtype=torch.long)
test_labels = torch.tensor(test_data['polarity'].values, dtype=torch.long)

train_dataset = Dataset.from_dict({
    'input_ids': train_encodings['input_ids'],
    'attention_mask': train_encodings['attention_mask'],
    'labels': train_labels
})

val_dataset = Dataset.from_dict({
    'input_ids': val_encodings['input_ids'],
    'attention_mask': val_encodings['attention_mask'],
    'labels': val_labels
})

test_dataset = Dataset.from_dict({
    'input_ids': test_encodings['input_ids'],
    'attention_mask': test_encodings['attention_mask'],
    'labels': test_labels
})


In [None]:
from transformers import BertForSequenceClassification

# Classificação binária (polaridade)
model = BertForSequenceClassification.from_pretrained('neuralmind/bert-base-portuguese-cased', num_labels=2)

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    learning_rate=1.2721865183729507e-05, #Anterior: 2e-5
    per_device_train_batch_size=16, #Anterior: 16
    per_device_eval_batch_size=64, #Anterior: 64
    num_train_epochs=3, #Anterior: 2
    weight_decay=0.11310079021892039, #Anterior: 0.01
    gradient_accumulation_steps=1, #Anterior: 2
    fp16=True,
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate(test_dataset)

In [None]:
from sklearn.metrics import accuracy_score, classification_report
import torch

def compute_metrics(dataset):
    predictions = trainer.predict(dataset)
    preds = torch.argmax(torch.tensor(predictions.predictions), axis=1)
    labels = dataset["labels"]
    accuracy = accuracy_score(labels, preds)
    report = classification_report(labels, preds, target_names=["Negativo", "Positivo"])

    print(f"Acurácia: {accuracy:.4f}")
    print("Relatório de Classificação:\n", report)

compute_metrics(test_dataset)

In [None]:
model.save_pretrained("./sentiment_model")
tokenizer.save_pretrained("./sentiment_model")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

model.save_pretrained("/content/drive/MyDrive/sentiment_model")
tokenizer.save_pretrained("/content/drive/MyDrive/sentiment_model")

In [None]:
model = BertForSequenceClassification.from_pretrained("./sentiment_model")
tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

def predict_sentiment(texts):
    encodings = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors="pt")
    outputs = model(**encodings)
    predictions = outputs.logits.argmax(dim=-1)
    return predictions

textos = ["Este úlitmo lançamento não foi legal", "Não podia ter comprado um produto melhor."]
predictions = predict_sentiment(textos)
print(predictions)

##Fazendo Fine-tuning dos Hiperparâmetros

In [None]:
pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.1-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.1-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.1 colorlog-6.9.0 optuna-4.3.0


In [None]:
from transformers import Trainer, TrainingArguments
import optuna

def model_init():
    return BertForSequenceClassification.from_pretrained(
        'neuralmind/bert-base-portuguese-cased',
        num_labels=2
    )

def hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [8, 16, 32]),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 2, 4),
        "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.3),
        "gradient_accumulation_steps": trial.suggest_categorical("gradient_accumulation_steps", [1, 2, 4]),
    }

optuna_args = TrainingArguments(
    output_dir="./optuna_test",
    eval_strategy="epoch",
    save_strategy="epoch",
    per_device_eval_batch_size=64,
    fp16=True,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

optuna_trainer = Trainer(
    model_init=model_init,
    args=optuna_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

print("Iniciando busca de hiperparâmetros com Optuna...")

best_run = optuna_trainer.hyperparameter_search(
    direction="maximize",
    n_trials=5,  # Você pode aumentar para mais testes
    hp_space=hp_space,
    backend="optuna"
)

print("\nMelhores hiperparâmetros encontrados:")
for param, value in best_run.hyperparameters.items():
    print(f"{param}: {value}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-05-26 02:26:43,939] A new study created in memory with name: no-name-8107899b-f2c6-47ff-bcaa-a8391a7e1ac7


Iniciando busca de hiperparâmetros com Optuna...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcleandersilva[0m ([33mcleandersilva-portal-puc-campinas[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,0.2066,0.158807
2,0.1287,0.164498


[I 2025-05-26 02:34:50,355] Trial 0 finished with value: 0.1644984930753708 and parameters: {'learning_rate': 2.5897590035892957e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 2, 'weight_decay': 0.12786594774902446, 'gradient_accumulation_steps': 4}. Best is trial 0 with value: 0.1644984930753708.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/loss,▁█
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▃▄▇██
train/global_step,▁▃▄▇██
train/grad_norm,█▇▁
train/learning_rate,█▄▁
train/loss,█▄▁

0,1
eval/loss,0.1645
eval/runtime,6.0303
eval/samples_per_second,631.477
eval/steps_per_second,9.95
total_flos,3506744145838080.0
train/epoch,2.0
train/global_step,1666.0
train/grad_norm,0.64303
train/learning_rate,0.0
train/loss,0.1287


Epoch,Training Loss,Validation Loss
1,No log,0.173142


[I 2025-05-26 02:40:02,152] Trial 1 finished with value: 0.17314240336418152 and parameters: {'learning_rate': 1.1572306976108192e-05, 'per_device_train_batch_size': 32, 'num_train_epochs': 2, 'weight_decay': 0.27942418129188995, 'gradient_accumulation_steps': 4}. Best is trial 1 with value: 0.17314240336418152.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/loss,█▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁██
train/global_step,▁██

0,1
eval/loss,0.17314
eval/runtime,5.966
eval/samples_per_second,638.285
eval/steps_per_second,10.057
total_flos,3496219703623680.0
train/epoch,1.994
train/global_step,416.0
train_loss,0.21086
train_runtime,308.5801
train_samples_per_second,172.766


Epoch,Training Loss,Validation Loss
1,0.1908,0.1811
2,0.1498,0.17325
3,0.1224,0.20024


[I 2025-05-26 02:51:35,579] Trial 2 finished with value: 0.20023983716964722 and parameters: {'learning_rate': 1.2721865183729507e-05, 'per_device_train_batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 0.11310079021892039, 'gradient_accumulation_steps': 1}. Best is trial 2 with value: 0.20023983716964722.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/loss,▃▁█
eval/runtime,█▄▁
eval/samples_per_second,▁▅█
eval/steps_per_second,▁▄█
train/epoch,▁▂▃▃▃▄▅▅▆▆▇██
train/global_step,▁▂▃▃▃▄▅▅▆▆▇██
train/grad_norm,▂▆▄▂▁▃▁▁█
train/learning_rate,█▇▆▅▅▄▃▂▁
train/loss,█▅▅▃▂▃▂▁▁

0,1
eval/loss,0.20024
eval/runtime,6.0333
eval/samples_per_second,631.167
eval/steps_per_second,9.945
total_flos,5260116218757120.0
train/epoch,3.0
train/global_step,4998.0
train/grad_norm,14.54717
train/learning_rate,0.0
train/loss,0.1224


Epoch,Training Loss,Validation Loss
1,0.2048,0.164147
2,0.1275,0.154048
3,0.1084,0.188663


[I 2025-05-26 03:00:47,863] Trial 3 finished with value: 0.1886626034975052 and parameters: {'learning_rate': 3.9853543236151584e-05, 'per_device_train_batch_size': 32, 'num_train_epochs': 3, 'weight_decay': 0.0874112051962272, 'gradient_accumulation_steps': 1}. Best is trial 2 with value: 0.20023983716964722.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/loss,▃▁█
eval/runtime,▁▅█
eval/samples_per_second,█▄▁
eval/steps_per_second,█▄▁
train/epoch,▁▂▃▅▅▆██
train/global_step,▁▂▃▅▅▆██
train/grad_norm,█▇▁▆
train/learning_rate,█▆▃▁
train/loss,█▅▂▁

0,1
eval/loss,0.18866
eval/runtime,6.0403
eval/samples_per_second,630.436
eval/steps_per_second,9.933
total_flos,5260116218757120.0
train/epoch,3.0
train/global_step,2499.0
train/grad_norm,1.12531
train/learning_rate,1e-05
train/loss,0.1084


Epoch,Training Loss,Validation Loss
1,0.218,0.162341
2,0.1382,0.164295


[I 2025-05-26 03:08:48,711] Trial 4 finished with value: 0.1642954796552658 and parameters: {'learning_rate': 1.3720528430905662e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 2, 'weight_decay': 0.24199327150893538, 'gradient_accumulation_steps': 4}. Best is trial 2 with value: 0.20023983716964722.



Melhores hiperparâmetros encontrados:
learning_rate: 1.2721865183729507e-05
per_device_train_batch_size: 16
num_train_epochs: 3
weight_decay: 0.11310079021892039
gradient_accumulation_steps: 1


##Utilizando o Modelo e a API

In [None]:
pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=25af2d0882f3a2815509c7ced48b376fba8956a16bd98c7deff7cc5ffe725443
  Stored in directory: /root/.cache/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from fpdf import FPDF
from datetime import datetime
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import os
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
ig_user_id = "17841449666813574"
app_id = "1750857045465243"
app_secret = "3879b6aeb6718a852a5bed85f1ab5fde"
user_access_token = "EAAY4ZASwZCWJsBO9DOETVck86K7likqssFJ88bX96jfn00zEr2QWY40D1kqpJ9BtfDtGMCwZBr2C1iYj7mVPTPSQjIZA5p3shN7WTPyyYB9xpBlCkvmz23uFrL6iTz15hUl0d7ZAGwcZA0jicA0j1yghUofENIClhf70xzMwlrbVvJb8b0SLyPg9fcUvK4Jwgw09kC2rplqc0Ds0H2mAZDZD"

url = f"https://graph.facebook.com/v17.0/oauth/access_token?grant_type=fb_exchange_token&client_id={app_id}&client_secret={app_secret}&fb_exchange_token={user_access_token}"
response = requests.get(url)
long_access_token = response.json()["access_token"]

base_url = f"https://graph.facebook.com/v17.0/{ig_user_id}/media?fields=id,caption,timestamp&access_token={long_access_token}"

if not os.path.exists('graficos'):
    os.makedirs('graficos')

In [None]:
def coletar_comentarios_por_publicacao():
    publicacoes = []
    response = requests.get(base_url)
    if response.status_code == 200:
        data = response.json()['data']
        for item in data:
            media_id = item['id']
            caption = item.get('caption', 'Sem legenda')
            timestamp = item.get('timestamp', None)

            comments_url = f'https://graph.facebook.com/v17.0/{media_id}/comments?fields=id,text,timestamp,username&access_token={long_access_token}'
            comments_response = requests.get(comments_url)

            comentarios = []
            if comments_response.status_code == 200:
                comments_data = comments_response.json().get('data', [])
                comentarios = [comment['text'] for comment in comments_data]
            else:
                print(f'Erro ao buscar comentários da mídia {media_id}')

            publicacoes.append({
                'media_id': media_id,
                'caption': caption,
                'comentarios': comentarios,
                'timestamp': timestamp
            })
    else:
        print('Erro ao buscar mídias:', response.text)

    return publicacoes

In [None]:
model = BertForSequenceClassification.from_pretrained("/content/drive/MyDrive/sentiment_model")
tokenizer = BertTokenizer.from_pretrained("/content/drive/MyDrive/sentiment_model")

In [None]:
def analisar_sentimentos(comentarios):
    resultados = []
    if comentarios:
        encodings = tokenizer(comentarios, padding=True, truncation=True, max_length=128, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**encodings)
            predictions = outputs.logits.argmax(dim=-1)

        for comentario, pred in zip(comentarios, predictions):
            sentimento = 'Positivo' if pred.item() == 1 else 'Negativo'
            resultados.append((comentario, sentimento))
    return resultados

In [None]:
def calcular_metricas(resultados):
    total = len(resultados)
    positivos = sum(1 for _, s in resultados if s == 'Positivo')
    negativos = total - positivos
    porcentagem_positivos = positivos / total * 100 if total else 0
    porcentagem_negativos = negativos / total * 100 if total else 0
    return positivos, negativos, porcentagem_positivos, porcentagem_negativos

In [None]:
def gerar_grafico_publicacao(caption, positivos, negativos, media_id):
    labels = ['Positivos', 'Negativos']
    sizes = [positivos, negativos]
    colors = ['#4CAF50', '#F44336']

    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors)
    ax.axis('equal')
    plt.title(caption[:50] + '...')
    caminho = f'graficos/{media_id}.png'
    plt.savefig(caminho)
    plt.close()
    return caminho

"""
def gerar_grafico_geral(total_positivos, total_negativos):
    labels = ['Positivos', 'Negativos']
    sizes = [total_positivos, total_negativos]
    colors = ['#4CAF50', '#F44336']

    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors)
    ax.axis('equal')
    plt.title('Distribuição Geral dos Sentimentos')
    caminho = 'graficos/geral.png'
    plt.savefig(caminho)
    plt.close()
    return caminho
"""

def gerar_grafico_temporal(resultados_temporais):
    if not resultados_temporais:
        return None

    resultados_ordenados = sorted(resultados_temporais, key=lambda x: x['data'])

    datas = [
      datetime.strptime(item['data'], '%Y-%m-%dT%H:%M:%S%z').strftime('%d/%m/%Y %H:%M')
      for item in resultados_ordenados
    ]

    porcentagens = [item['pct_positivos'] for item in resultados_ordenados]
    legends = [item['caption'][:30] + '...' if len(item['caption']) > 30 else item['caption'] for item in resultados_ordenados]

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(datas, porcentagens, marker='o', color='#2196F3', linestyle='-')

    for i, txt in enumerate(legends):
        ax.annotate(txt, (i, porcentagens[i]), textcoords="offset points", xytext=(0,10),
                    ha='center', fontsize=8, rotation=45)

    ax.set_xticks(datas)
    ax.set_xticklabels(datas, rotation=45, ha='right', fontsize=8)

    ax.set_title('Evolução da Avaliação das Publicações ao Longo do Tempo')
    ax.set_xlabel('Data da Publicação')
    ax.set_ylabel('% de Comentários Positivos')
    ax.set_ylim(0, 100)
    ax.grid(True)

    caminho = 'graficos/grafico_temporal.png'
    plt.tight_layout()
    plt.savefig(caminho)
    plt.close()
    return caminho


In [None]:
#Função utilizada na solução provisória para os emojis dando erro ao gerar o pdf
def remove_emojis(text):
    return text.encode('latin-1', 'ignore').decode('latin-1')

In [None]:
class PDFRelatorio(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 16)
        self.cell(0, 10, 'Relatório de Análise de Sentimentos - Instagram', 0, 1, 'C')
        self.ln(10)

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Página {self.page_no()}', 0, 0, 'C')

    def add_publicacao(self, caption, positivos, negativos, porcentagem_positivos, porcentagem_negativos, grafico_path):
        self.set_font('Arial', 'B', 12)
        #Solução provisória para os emojis
        self.multi_cell(0, 10, remove_emojis(caption))
        self.set_font('Arial', '', 12)
        self.cell(0, 10, f'Positivos: {positivos} ({porcentagem_positivos:.2f}%)', 0, 1)
        self.cell(0, 10, f'Negativos: {negativos} ({porcentagem_negativos:.2f}%)', 0, 1)
        self.ln(3)
        self.image(grafico_path, w=150)
        self.ln(10)

    def add_conclusao_geral(self, total_positivos, total_negativos, pct_positivos, pct_negativos, grafico_path):
        self.add_page()
        self.set_font('Arial', 'B', 14)
        self.cell(0, 10, 'Resumo Geral', 0, 1, 'C')
        self.ln(5)
        self.set_font('Arial', '', 12)
        self.cell(0, 10, f'Total de Comentários Positivos: {total_positivos} ({pct_positivos:.2f}%)', 0, 1)
        self.cell(0, 10, f'Total de Comentários Negativos: {total_negativos} ({pct_negativos:.2f}%)', 0, 1)
        self.ln(5)
        self.image(grafico_path, w=150)
        self.ln(10)

        conclusao = 'Conclusão geral: '
        if pct_positivos > 70:
            conclusao += 'O perfil está muito bem avaliado!'
        elif pct_positivos > 40:
            conclusao += 'O perfil está com avaliação mista.'
        else:
            conclusao += 'O perfil está sendo mal avaliado.'

        self.multi_cell(0, 10, conclusao)

In [None]:
publicacoes = coletar_comentarios_por_publicacao()
pdf = PDFRelatorio()
pdf.add_page()

print(publicacoes)

total_resultados = []
resultados_temporais = []

total_positivos = 0
total_negativos = 0

for publicacao in publicacoes:
    comentarios = publicacao['comentarios']
    caption = publicacao['caption']
    media_id = publicacao['media_id']

    if comentarios:
        resultados = analisar_sentimentos(comentarios)
        positivos, negativos, pct_positivos, pct_negativos = calcular_metricas(resultados)
        grafico_path = gerar_grafico_publicacao(caption, positivos, negativos, media_id)

        pdf.add_publicacao(caption, positivos, negativos, pct_positivos, pct_negativos, grafico_path)

        total_positivos += positivos
        total_negativos += negativos
        total_resultados.extend(resultados)

        resultados_temporais.append({
            'data': publicacao['timestamp'],
            'pct_positivos': pct_positivos,
            'caption': caption
        })

grafico_temporal_path = gerar_grafico_temporal(resultados_temporais)
pct_total_positivos = total_positivos / (total_positivos + total_negativos) * 100 if (total_positivos + total_negativos) else 0
pct_total_negativos = 100 - pct_total_positivos

pdf.add_conclusao_geral(total_positivos, total_negativos, pct_total_positivos, pct_total_negativos, grafico_temporal_path)

pdf.output('relatorio_sentimentos_instagram.pdf')

[{'media_id': '17982061616675940', 'caption': 'O que esse negócio de I.A tá ficando bom em foto é brincadeira 😳', 'comentarios': ['Que lindosss❤️❤️', '❤️❤️❤️', 'A IA nao colocou aliança na sua foto 😠', 'Amei!! ❤️❤️', 'ta roubando o emprego do vasco', 'Legal que na terceira foto a Le não tá de olho fechado mas a IA entendeu que tava hahahaha', 'A Porsche virou fusca kkkkkkkk', 'show de bola🙌❤️', 'parece o dj oreia', 'ficou parecido irmão 👏👏'], 'timestamp': '2025-03-31T15:00:00+0000'}, {'media_id': '18487181953049729', 'caption': 'Obrigado por essa vista maravilhosa!! 🥹', 'comentarios': ['Kkkkkkkkkkkkk', '😂😂😂😂😂', '👏👏👏muito  bom', 'O que importa é a companhia!!', '😂😂😂', 'Nuussssss......deu ate medo 😂😂'], 'timestamp': '2025-01-06T16:19:11+0000'}, {'media_id': '18044393930191067', 'caption': 'Eu e você, você e eu ♥️', 'comentarios': ['👏👏👏👏👏👏👏👏👏', '🔥🔥🔥🔥🔥🔥kkkkk', '👏👏👏👏👏🔥🔥🔥🔥', '💘💘💘💘', 'linducos', 'Lindos amooooo ❤️❤️❤️', 'Seus lindos ❤️❤️', 'Lindicos', 'Meu tudinho', 'Te amo muito lindeza❤️❤️❤

  plt.savefig(caminho)
  plt.savefig(caminho)
  plt.savefig(caminho)
  plt.savefig(caminho)
  plt.savefig(caminho)
  plt.savefig(caminho)
  plt.savefig(caminho)


''