In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import pandas as pd
import os
import numpy as np

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\felip\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\felip\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [16]:
mensagem_cliente = "estou com problemas no aplicativo"

In [17]:
client = OpenAI()

api = os.getenv("OPENAI_API_KEY")

client.api_key = api

## Dados

In [18]:
dados = pd.read_excel('dados_exemplo.xlsx')
df_dados = pd.DataFrame(dados)
df_dados

Unnamed: 0,protocolo,texto_manif,texto_resposta
0,1,"Estou com problemas na fatura, veio com valor ...","Prezado cliente, verificamos sua fatura e real..."
1,2,Minha internet está muito lenta nos últimos dias.,"Entendemos seu problema, reiniciamos o seu ser..."
2,3,Não consigo acessar minha conta no aplicativo.,Verificamos que houve uma atualização recente ...
3,4,Preciso de uma segunda via da fatura do mês pa...,A segunda via da sua fatura foi enviada para o...
4,5,"Fiz uma compra, mas não recebi a confirmação.",Verificamos seu pedido e ele foi confirmado co...
5,6,Meu produto chegou com defeito. Quero trocar.,Sentimos muito pelo ocorrido. Iniciamos o proc...
6,7,Estou tentando cancelar um serviço e não consigo.,Lamentamos pela dificuldade. O cancelamento fo...
7,8,Preciso alterar o endereço de entrega do meu p...,O endereço foi atualizado conforme solicitado....
8,9,"Fiz um pagamento, mas ainda consta como pendente.",Seu pagamento foi identificado e o status será...
9,10,Gostaria de saber sobre os planos de assinatur...,Temos diversos planos disponíveis! Enviamos as...


## Transformando as respostas em lista

In [19]:
manifestacoes = df_dados['texto_manif'].tolist()
manifestacoes

['Estou com problemas na fatura, veio com valor errado.',
 'Minha internet está muito lenta nos últimos dias.',
 'Não consigo acessar minha conta no aplicativo.',
 'Preciso de uma segunda via da fatura do mês passado.',
 'Fiz uma compra, mas não recebi a confirmação.',
 'Meu produto chegou com defeito. Quero trocar.',
 'Estou tentando cancelar um serviço e não consigo.',
 'Preciso alterar o endereço de entrega do meu pedido.',
 'Fiz um pagamento, mas ainda consta como pendente.',
 'Gostaria de saber sobre os planos de assinatura disponíveis.']

## Vetorização com TF-IDF

In [20]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(manifestacoes)
tfidf_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 78 stored elements and shape (10, 62)>

### Visualização da vetorização

In [21]:
# nomes dos termos (as palavras do vocabulário)
terms = vectorizer.get_feature_names_out()

# converte a matriz TF-IDF para um array denso
dense_matrix = tfidf_matrix.todense()

# converte para df
df_tfidf = pd.DataFrame(dense_matrix, columns=terms)

df_tfidf

Unnamed: 0,acessar,ainda,alterar,aplicativo,assinatura,cancelar,chegou,com,como,compra,...,serviço,sobre,tentando,trocar,um,uma,valor,veio,via,últimos
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.556438,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.327281,0.327281,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.359846
2,0.408301,0.0,0.0,0.408301,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.292548,0.0,0.0,0.344138,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.418081,...,0.0,0.0,0.0,0.0,0.0,0.355407,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.393893,0.334845,0.0,0.0,...,0.0,0.0,0.0,0.393893,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.418081,0.0,0.0,0.0,0.0,...,0.418081,0.0,0.418081,0.0,0.355407,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.385727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.37351,0.0,0.0,0.0,0.0,0.0,0.0,0.37351,0.0,...,0.0,0.0,0.0,0.0,0.317517,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.329466,0.0,0.0,0.0,0.0,0.0,...,0.0,0.329466,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Função para encontrar a manifestação mais similar

### Com Scikit-learn

In [22]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('portuguese'))

def preprocess_text(text):
    # remover caracteres especiais e converter para minúsculas
    text = re.sub(r'\W', ' ', text).lower()

    # tokenizar e lematizar
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    
    return ' '.join(tokens)

# aplicando o preprocessamento às manifestações e à nova mensagem do cliente
manifestacoes_processadas = [preprocess_text(m) for m in manifestacoes]
mensagem_cliente_processada = preprocess_text(mensagem_cliente)


In [23]:
mensagem_cliente_processada

'problemas aplicativo'

In [24]:
def recuperar_resposta(mensagem_cliente):
    # vetorizar a nova manifestação do cliente
    mensagem_cliente_tfidf = vectorizer.transform([mensagem_cliente])
    
    # calcular a similaridade de cosseno com todas as manifestações da base
    similaridades = cosine_similarity(mensagem_cliente_tfidf, tfidf_matrix)
    
    # encontrar o índice da manifestação mais similar
    indice_mais_similar = similaridades.argmax()
    similaridade_maxima = similaridades.max()

    threshold = 0.1  # limiar de similaridade

    if similaridade_maxima >= threshold:
        # manifestação e a resposta associada do atendente
        manifestacao_similar = manifestacoes[indice_mais_similar]
        
        # texto_resposta com base no texto_manif correspondente
        resposta_recuperada = df_dados.loc[df_dados['texto_manif'] == manifestacao_similar, 'texto_resposta'].values[0]

    return resposta_recuperada, manifestacao_similar

# teste para recuperar a manifestação e a resposta mais similar da base de dados
resposta_recuperada, manifestacao_similar = recuperar_resposta(mensagem_cliente_processada)

print(f"\nMensagem cliente: {mensagem_cliente}")
print(f"\nMensagem recuperada: {manifestacao_similar}")
print(f"\nResposta recuperada: {resposta_recuperada}")



Mensagem cliente: estou com problemas no aplicativo

Mensagem recuperada: Não consigo acessar minha conta no aplicativo.

Resposta recuperada: Verificamos que houve uma atualização recente no app. Por favor, tente reinstalar. Se o problema continuar, nos avise.


### Com embeddings da OpenAI

In [25]:
# # Função para gerar embeddings usando a API da OpenAI
# def gerar_embeddings_textos(textos):
#     response = client.embeddings.create(
#         model="text-embedding-ada-002",
#         input=textos
#     )
    
#     # Acessando corretamente os dados da resposta
#     embeddings = [r.embedding for r in response.data]
#     return np.array(embeddings)

# # Geração dos embeddings para as manifestações
# embeddings_manifestacoes = gerar_embeddings_textos(manifestacoes)
# embeddings_manifestacoes

In [26]:
# def recuperar_resposta_com_embeddings(mensagem_cliente):
#     # Gerar embedding para a nova manifestação do cliente
#     embedding_cliente = gerar_embeddings_textos([mensagem_cliente])[0]  # Pega o primeiro (e único) vetor
    
#     # Calcular a similaridade de cosseno entre a manifestação do cliente e todas as manifestações da base
#     similaridades = cosine_similarity([embedding_cliente], embeddings_manifestacoes)
    
#     # Encontrar o índice da manifestação mais similar
#     indice_mais_similar = np.argmax(similaridades)
    
#     # Retornar a resposta correspondente
#     return dados[indice_mais_similar]['resposta']
# recuperar_resposta_com_embeddings(mensagem_cliente)

### Agora que conseguimos recuperar a resposta mais similar da base de dados, vamos usar essa resposta como contexto para gerar uma nova resposta usando o gpt 3.5

## Função para gerar resposta final usando GPT-3.5 com a resposta recuperada

In [27]:
def gerar_resposta_automatica(mensagem_cliente):    
    # chamada da API para gerar a resposta final
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "Você é um atendente que responde clientes de forma educada, objetiva e clara."},
            
            # contexto com a resposta recuperada
            {"role": "user", "content": f"O cliente disse: {mensagem_cliente}. Manifestação anterior: {manifestacao_similar}. Resposta anterior: {resposta_recuperada}. Por favor, responda ao cliente de forma clara e objetiva."}
        ],
        model="gpt-3.5-turbo",
        temperature=0.1,        # ajuste de criatividade
        max_tokens=150,         # limite de tokens para a resposta
        n=1                     # número de respostas a serem geradas
    )
    
    return response.choices[0].message.content.strip()  # apenas o texto limpo

In [28]:
resposta_automatica = gerar_resposta_automatica(mensagem_cliente)
print(resposta_automatica)

Olá, lamentamos pelo transtorno. Recomendamos que você tente reinstalar o aplicativo. Se o problema persistir, por favor, entre em contato conosco para que possamos ajudá-lo a resolver a questão.
