In [1]:
import torch
import os
import getpass

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain_huggingface import HuggingFacePipeline

from langchain.prompts import PromptTemplate
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_core.messages import SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

2025-01-20 20:11:46.136766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-20 20:11:46.139184: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 20:11:46.197117: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
####remember to use your huggingface token here



In [3]:
os.environ["HF_TOKEN"] = getpass.getpass() ###this request your huggingface token

In [4]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
#model_id = "microsoft/Phi-3-mini-4k-instruct"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_id)

pipe = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.1,
    max_new_tokens=500,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=False,
)
llm = HuggingFacePipeline(pipeline=pipe)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:
# template for LLAMA 3
#template = """
#<|begin_of_text|>
#<|start_header_id|>system<|end_header_id|>
#Você é um assistente virtual prestativo e está respondendo perguntas gerais.
#<|eot_id|>
#<|start_header_id|>user<|end_header_id|>
#{pergunta}
#<|eot_id|>
#<|start_header_id|>assistant<|end_header_id|>
#"""

#template

In [20]:
template_rag = """
<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
You are a professional astronomer searching for the main topics in the new articles available on
archive with the goal to check the main results from those articles.  When providing results, ensure they are formatted as:
- Bullet points for key topics.
- Paragraphs for explanations.
- Use Markdown if helpful.
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Pergunta: {question}
Contexto: {context}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""

In [21]:
prompt_rag = PromptTemplate.from_template(template_rag)
print(prompt_rag)

input_variables=['context', 'question'] input_types={} partial_variables={} template='\n<|begin_of_text|>\n<|start_header_id|>system<|end_header_id|>\nYou are a professional astronomer searching for the main topics in the new articles available on\narchive with the goal to check the main results from those articles.  When providing results, ensure they are formatted as:\n- Bullet points for key topics.\n- Paragraphs for explanations.\n- Use Markdown if helpful.\n<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\nPergunta: {question}\nContexto: {context}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n'


In [7]:
loader = WebBaseLoader(web_paths = ("https://arxiv.org/list/astro-ph.GA/new",),)
docs = loader.load()

In [10]:
###splittting text for the text model
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, add_start_index = True)
splits = text_splitter.split_documents(docs)

In [11]:
splits

[Document(metadata={'source': 'https://arxiv.org/list/astro-ph.GA/new', 'title': 'Astrophysics of Galaxies  ', 'language': 'en', 'start_index': 3}, page_content='Astrophysics of Galaxies  \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSkip to main content\n\n\n\n\n\nIn just 3 minutes help us improve arXiv:\nAnnual Global Survey\n\nWe gratefully acknowledge support from the Simons Foundation, member institutions, and all contributors.\nDonate\n\n\n\n\n\n >\nastro-ph.GA\n\n\n\n\n\n\nHelp | Advanced Search\n\n\n\n\nAll fields\nTitle\nAuthor\nAbstract\nComments\nJournal reference\nACM classification\nMSC classification\nReport number\narXiv identifier\nDOI\nORCID\narXiv author ID\nHelp pages\nFull text\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nopen search\n\n\n\n\n\n\nGO\n\n\n\nopen navigation menu\n\n\nquick links\n\nLogin\nHelp Pages\nAbout\n\n\n\n\n\n\n\n\n\n\n\nAstrophysics of Galaxies\n\nNew submissions\nCross-lists\nReplacements\n\nSee recent articles\nShowing new listings for Monday, 20

In [12]:
###turn the text splited into embeddings
hf_embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")

In [13]:
####aplicando o embbedding na base de dados vetorial
vectorstore = Chroma.from_documents(documents=splits, embedding=hf_embeddings)

In [14]:
retriever = vectorstore.as_retriever(search_type = "similarity", search_kwargs = {"k" : 6})

In [15]:
template_rag

'\n<|begin_of_text|>\n<|start_header_id|>system<|end_header_id|>\nYou are a professional astronomer searching for the main topics in the new articles available on\narchive with the goal to check the main results from those articles.\n<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\nPergunta: {question}\nContexto: {context}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n'

In [22]:
prompt_rag = PromptTemplate(
    input_variables=["context", "question"],
    template=template_rag,
)
prompt_rag

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n<|begin_of_text|>\n<|start_header_id|>system<|end_header_id|>\nYou are a professional astronomer searching for the main topics in the new articles available on\narchive with the goal to check the main results from those articles.  When providing results, ensure they are formatted as:\n- Bullet points for key topics.\n- Paragraphs for explanations.\n- Use Markdown if helpful.\n<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\nPergunta: {question}\nContexto: {context}\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n')

In [17]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [23]:
chain_rag = ({"context": retriever | format_docs, "question" : RunnablePassthrough()}
            | prompt_rag
            | llm
            | StrOutputParser())

In [24]:
chain_rag.invoke("are there some article using JWST data? give me only the title of the articles")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'Here are the titles of the articles that use JWST data:\n\n* "JWST Imaging of the Closest Globular Clusters -- V. The White Dwarfs Cooling Sequence of M4"\n* "Chasing the Light: Shadowing, Collimation, and the Super-Eddington Growth of Infant Black Holes in JWST-Discovered AGNs"\n* "Photometric detection at $7.7\\ \\mu\\mathrm{m}$ of a galaxy beyond redshift $14$ with JWST/MIRI"\n\nThese articles appear to be related to astronomy and astrophysics, specifically focusing on the James Webb Space Telescope (JWST) and its capabilities.'

In [25]:
import gradio as gr

ModuleNotFoundError: No module named 'gradio'