In [1]:
from motor.motor_asyncio import AsyncIOMotorClient
import asyncio
from pymongo import MongoClient
import pandas as pd


DB_HOST_DEV = "mongo"
DB_PASSWORD_DEV = "example"
DB_PORT_DEV = "27017"
DB_DATABASE_DEV = "teste_data"

MONGODB_HOST = "127.0.0.1"  # Ou o endereço IP/nome do servidor MongoDB
MONGODB_PORT = 27017  # Ou a porta correta em que o MongoDB está configurado para ouvir

In [19]:
DB_URL = f"""mongodb://{MONGODB_HOST}:{DB_PASSWORD_DEV}
@{DB_HOST_DEV}:{DB_PORT_DEV}/"""

# client = AsyncIOMotorClient(DB_URL)

client = MongoClient('localhost', 27017)

database = client[DB_DATABASE_DEV]
conversations_collection = database["bot"]

description_colletion = database["description"]


In [20]:
result = description_colletion.find_one({})

In [21]:
data = pd.DataFrame(result["descriptions"]).reset_index(drop=True)

In [22]:
data

Unnamed: 0,id,description
0,1,Problema com Reembolso: O cliente está com dif...
1,2,Sinistro de Veículo: O cliente sofreu um acide...
2,3,Alteração de Dados Pessoais: O cliente mudou d...
3,4,Renovação de Apólice: O cliente tem uma apólic...
4,5,Dúvidas sobre Cobertura: O cliente tem dúvidas...
5,6,Cancelamento de Apólice: O cliente deseja canc...


In [23]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nlp = spacy.load("pt_core_news_sm")


# Custom exception classes for better error handling
class DataLoadingError(Exception):
    pass


class DataProcessingError(Exception):
    pass


class SimilarityModelError(Exception):
    pass


def load_data():

    try:
        result = description_colletion.find_one({})

        data = pd.DataFrame(result["descriptions"]).reset_index(drop=True)
        data["description"] = data["description"].apply(lambda x: x.lower())
        data["description"] = data["description"].apply(remove_stopwords)
        return data

    except Exception as e:
        raise DataLoadingError(f"Error loading or processing data: {e}") from e


def instantiate_vectorizer(data):
    try:
        # Instantiate the TF-IDF vectorizer and fit on the entire dataset
        vectorizer = TfidfVectorizer(lowercase=True, strip_accents="unicode")
        tfidf_matrix = vectorizer.fit_transform(data["description"])
        return vectorizer, tfidf_matrix

    except Exception as e:
        raise DataProcessingError(f"Error in vectorizer instantiation: {e}") from e


def remove_stopwords(text):
    doc = nlp(text)
    filtered_words = [token.text for token in doc if not token.is_stop]
    return " ".join(filtered_words)


def similarity_model(user_input):
    try:
        vectorizer, tfidf_matrix = instantiate_vectorizer(load_data())

        user_input = remove_stopwords(user_input)

        # Vetorização do input do usuário
        input_vector = vectorizer.transform([user_input])

        # Cálculo da similaridade de cosseno entre o input do usuário e cada descrição
        similarity_scores = cosine_similarity(input_vector, tfidf_matrix)

        most_similar_index = similarity_scores.argmax()

        return int(most_similar_index)

    except Exception as e:
        raise SimilarityModelError(f"Error in similarity model: {e}") from e




In [24]:
similarity_model("Estou com alguns problemas com meu seguro.")

4