In [1]:
import os
import pandas as pd
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
#import dotenv
from tqdm import tqdm
from langchain_community.vectorstores import Chroma
from langchain.schema import Document

## To run Hugging Face OpenSource models
# Needs to manually install Visual C++ Tools from: https://visualstudio.microsoft.com/visual-cpp-build-tools/
from InstructorEmbedding import INSTRUCTOR
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
import warnings
from rich import print

# Suppress all warnings
warnings.filterwarnings("ignore")

  from tqdm.autonotebook import trange


In [2]:
import torch
import torch.nn as nn

# Check if CUDA is available
print(f"CUDA Available: {torch.cuda.is_available()}")

# Print CUDA device name
if torch.cuda.is_available():
    print(f"Device Name: {torch.cuda.get_device_name(0)}")

CUDA Available: True
Device Name: NVIDIA GeForce RTX 3050


In [3]:
# Ensure GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
# Define directories
# Define the persistent directory containing the VectorDB
script_dir =  os.getcwd()
persistent_dir = os.path.abspath(os.path.join(script_dir,'..' ,'index', 'WebScrap_CSVs'))

# SE REQUIERE UTILIZAR EL MISMO MODELO DE SENTENCE-TRANSFORMER CON EL QUE SE CREO EL INDEX, sin ello se enfrentaran:
#Espacios Vectoriales Diferentes:
#
#Cada modelo de sentence-transformer genera embeddings en su propio espacio vectorial. Estos espacios son definidos por los pesos y arquitecturas específicas de cada modelo.
#Los embeddings generados por el modelo A no son directamente comparables con los del modelo B porque están en espacios diferentes y no alineados.
#Incompatibilidad de Embeddings:
#
#Las representaciones vectoriales (embeddings) de las mismas frases pueden ser muy distintas entre modelos. Por ejemplo, una oración podría tener un vector [0.1, 0.2, 0.3] en el modelo A y [0.5, -0.1, 0.2] en el modelo B.
#Esto significa que medidas de similitud como la similitud coseno no serán significativas, ya que los vectores no están en el mismo espacio.
#Resultados Incorrectos o Sin Sentido:
#
#Al realizar consultas, las comparaciones entre embeddings del índice (modelo A) y los embeddings de la consulta (modelo B) producirán resultados erróneos.
#Podrías obtener altas similitudes entre oraciones no relacionadas o bajas similitudes entre oraciones muy similares.
#Pérdida de Precisión y Rendimiento:
#
#El rendimiento del sistema de recuperación de información se degradará significativamente.
#Los usuarios recibirán resultados irrelevantes, lo que afecta la usabilidad y confiabilidad del sistema.
#Consistencia en Procesamiento de Lenguaje Natural:
#
#Los modelos pueden tener diferentes enfoques para manejar ciertos aspectos del lenguaje, como negaciones, sarcasmo o lenguaje coloquial.
#Esto añade otra capa de inconsistencia entre los embeddings generados por diferentes modelos.

embed_model = "sentence-transformers/all-mpnet-base-v2"


# Para correr las predicciones en GPU
model_kwargs = {'device': 'cuda:0'}  # specify GPU device
encode_kwargs = {'normalize_embeddings': True}

### Q&A Chain

This will run peristantly, consulting already stored vectors

### Using a LLM Chain to provide context and system prompts on every query

In [26]:
#from langchain_chroma import Chroma
from langchain_community.vectorstores import Chroma

#For some reason, "context" cant be used as input variable, it should be named as "summaries"
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

##### Embedding model (sentence-transformer)
model_name = embed_model
model_kwargs = {'device': 'cuda:0'}  # specify GPU device
encode_kwargs = {'normalize_embeddings': True}
hf_embed_model = HuggingFaceInstructEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

##### LLM model
# loading the Llama3 model from local
llm_model = OllamaLLM(model="llama3.1",
                temperature=0.7,
                num_thread=8,
                )
# loading the vectorstore
vectorstore = Chroma(persist_directory=persistent_dir, embedding_function=hf_embed_model)
# casting  the vectorstore as the retriever, taking the best 3 similarities
retriever = vectorstore.as_retriever(search_kwargs={"k":5})

# formating the docs
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

template = """Always answer in English. 
If you can infer an specific code (based on a very good match) you must respond only with this kind fo format "XXXX.XX" and answer it at the beggining with: 
"The suggested HS code is: " 

In the case you can't find a matching code for the description, respond with:
'Your inquiry is very subjective, to provide an accurate HS Code I need you to be more specific, please include size, weight, origin, meaning of use, etc. 
So far, I can narrow it to this chapters/parts : 'provide 3 possible codes".

context:
{summaries}

Question:
{question}
"""

# Define the LLM chain (using the Llama3.1 model)
llm_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm_model,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,  # To get both the answer and source docs
    chain_type_kwargs={
            "prompt": PromptTemplate(
                template=template,
                #For some reason, "context" cant be used as input variable, it should be named as "summaries"
                input_variables=["question", "summaries"],
            ),
        },
)

context = """As a logistics shipping arrival inspector, your primary responsibility is to inspect incoming shipments and accurately classify goods 
using the Harmonized System (HS) code based on the descriptions provided in the shipping manifests. You will thoroughly review the manifest details, 
including product type, material composition, function, and intended use, to determine the correct HS code. 

Your task is to:
Carefully read and analyze the product descriptions from the manifest.
Identify key characteristics of the goods, such as 
type (e.g., electronics, textiles, machinery), 
material (e.g., plastic, metal, organic), 
and usage (e.g., household, industrial, medical).
Use your knowledge of the HS code classification system to assign the most appropriate HS code for each product based on its description.
Ensure compliance with international trade regulations by selecting precise codes to avoid delays or penalties.
Remember to be thorough and accurate in your classification, as this impacts customs processing, tariffs, and legal requirements."""


#-----Benchmark query------
#Response should be: "0106.19" 
#query = "Live Dog"
query = "A Live Dog of breed Schnauzer from Germany"
#query = "Live Pig"
#query = "Live Buffalo"
#query = "What HS Code belongs to a wrench?"
#query = "What HS Code belongs to a live dog?"
#query = "What HS Code belongs to:'CERAMIC TABLEWARE  2688 BOXES PO 5 39548'?"
#query = "Covers for cellphone screen"

# Execute the chain with the query
#For some reason, "context" cant be used as input variable, it should be named as "summaries"
result = llm_chain({"question": query, "summaries": context})

# Process the results
#print(result.keys())
print(f"[bold yellow]The requested item's description to search HTS code is:[/bold yellow]\n{query}")
print(f"[bold green]The response of the LLM is:[/bold green]\n{result['answer']}")

#print("The documents content used for this response are:")
#for i in range(len(result["source_documents"])):
#    print(result["source_documents"][i].page_content)
#    print(result["source_documents"][i].metadata)
     
#results = vectorstore.similarity_search_with_score(query=query, k=3)
## Print similarity results
#for doc, score in results:
#    print(f"Document content: {doc.page_content}, Code: {doc.metadata},Similarity Score: {score}")



load INSTRUCTOR_Transformer
max_seq_length  512


### Print it on a nicer format