# Similarity search

# Setup

In [1]:
# %pip install -r requirements.txt

In [2]:
# Environment Variables
import os
from dotenv import load_dotenv

# Load env
load_dotenv()

True

# Reference

https://www.hiberus.com/crecemos-contigo/ask-your-web-pages-otro-enfoque-rag-utilizando-modelos-de-codigo-abierto/

https://colab.research.google.com/drive/1rt318Ew-5dDw21YZx2zK2vnxbsuDAchH?usp=sharing#scrollTo=YFw8HWIyTCnJ

https://www.reddit.com/r/LocalLLaMA/comments/16j624z/some_questions_of_implementing_llm_to_generate_qa/

https://docs.rungalileo.io/galileo/llm-studio/prompt-inspector/using-prompt-with-rag-or-vector-databases

https://towardsdatascience.com/rag-how-to-talk-to-your-data-eaf5469b83b0

https://github.com/edumunozsala/question-answering-pinecone-sts

# Directory

In [3]:
# Set directory to file location
from pathlib import Path
import sys
notebook_location = Path(os.path.abspath(''))
os.chdir(notebook_location)
# Get the current working directory
current_directory = os.getcwd()
current_directory

'/notebooks/LawGPT'

# Libraries

In [4]:
# General
from IPython.display import Markdown, display
import gradio as gr
import pinecone
import time
import yaml
import json

import gc
import os

# HuggingFace
from huggingface_hub import notebook_login

# Transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import transformers

# Langchain
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.chains import SimpleSequentialChain, RetrievalQA, LLMChain
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import FAISS
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate
)
from langchain import HuggingFacePipeline
from langchain import PromptTemplate

# Torch
from torch import cuda, bfloat16, float16
import torch

# Other
from tqdm.notebook import tqdm

# Local
from functions import *

# Warnings
import warnings
warnings.filterwarnings("ignore")

# Platform login

Use credentials from HuggingFace

In [5]:
# HF Key
hf_key = os.environ.get('HF_KEY')
print(hf_key)

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


In [6]:
# Jupyter / Colab
# notebook_login()

# VS Code
# Run huggingface-cli login in console

In [7]:
# Setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

# CUDA information
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

Quadro P5000
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [8]:
# Clean memory
torch.cuda.empty_cache()
gc.collect()

248

# Pinecone

In [9]:
# Init pinecone
pinecone.init(
    api_key = os.environ.get('PINECONE_API_KEY'),
    environment = os.environ.get('PINECONE_ENVIRONMENT')
)

# Connect
index_name = 'lawgpt-unstructured-db'
index = pinecone.Index(index_name)

# Index stats
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.03524,
 'namespaces': {'': {'vector_count': 3524}},
 'total_vector_count': 3524}

# Parameters

In [10]:
# Load parameters from YAML file
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Embedding model

In [11]:
# Model ID
embed_model_id = config["embedding_model"]

# Embed model
embed_model = HuggingFaceEmbeddings(
    model_name = embed_model_id,
    model_kwargs = {'device': device},
    encode_kwargs = {'device': device, 'batch_size': 32}
) 

.gitattributes:   0%|          | 0.00/968 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.09k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/471M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

unigram.json:   0%|          | 0.00/14.8M [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

In [12]:
# CUDA information
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Quadro P5000
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


# Vector store

In [20]:
# Field in metadata with text
text_field = 'text'

# Initiate langchain vectorstore
vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)

# Test models

In [21]:
query = 'Explícame el Artículo 245 del Código Penal de España, que hace referencia a la usurpación de inmuebles.'

# Find closer docs

In [22]:
# Look for closer doc
similarity_output = vectorstore.similarity_search_with_score(query, k = config['top_k_docs'])

In [23]:
# Context preprocessed
context_processed = [
    {"context": doc[0].page_content, "score": doc[1]} for doc in similarity_output
]

In [24]:
context_processed

[{'context': 'Ley Orgánica 8/1983, de 25 de junio, de Reforma Urgente y Parcial del Código Penal.: E1 párrafo 1. queda así redactado: <Los delitos prescriben a los veinte años cuando la Ley señalare al delito la pena de reclusión mayor>. Artículo 115. El apartado 1. queda así redactado: <Las de reclusión mayor a los treinta y cinco años>. Artículo 120. Queda así redactado: <El español que indujere a una potencia extranjera a declarar la guerra a España o se concertase con ella para el mismo fin, será castigado con la pena de reclusión mayor>. Artículo 137 bis. Queda redactado así: <Los que, con propósito de destruir, total o parcialmente, a un grupo nacional étnico, racial o religioso perpetraren alguno de los actos siguientes, serán castigados: 1. Con la pena de reclusión mayor si causaren la muerte castración, esterilización, mutilación o lesión grave a alguno de sus miembros. 2. Con la reclusión menor, si sometieren al grupo o a cualquiera de sus individuos a condiciones de existenc