In [1]:
!pip install accelerate==0.33.0 transformers==4.31.0 tokenizers==0.13.3
!pip install bitsandbytes==0.40.0 einops==0.6.1
!pip install xformers==0.0.22.post7
!pip install langchain==0.1.4
!pip install faiss-gpu==1.7.1.post3
!pip install sentence_transformers
!pip install -q streamlit
!npm install localtunnel

Collecting accelerate==0.33.0
  Downloading accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting transformers==4.31.0
  Downloading transformers-4.31.0-py3-none-any.whl.metadata (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.9/116.9 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting tokenizers==0.13.3
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading accelerate-0.33.0-py3-none-any.whl (315 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.1/315.1 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m76.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [10]:
import shutil
source_path = '/kaggle/input/faiss/transformers/default/1/Faiss.pkl'
destination_path = '/kaggle/working/'

# Copy the file
shutil.copy(source_path, destination_path)

'/kaggle/working/Faiss.pkl'

In [28]:
source_path = '/kaggle/input/llama2-img/llama-2.png'
destination_path = '/kaggle/working/'

# Copy the file
shutil.copy(source_path, destination_path)

'/kaggle/working/llama-2.png'

In [29]:
%%writefile QFin-App.py
import os
from torch import cuda, bfloat16
import transformers
import streamlit as st
import pickle
import torch
import time
from transformers import StoppingCriteria, StoppingCriteriaList
from langchain.llms import HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
model_id = 'meta-llama/Llama-2-7b-chat-hf'
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16,
    load_in_8bit_fp32_cpu_offload=True
)
hf_auth="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"
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map={"": device},
    use_auth_token=hf_auth
)

model.eval()

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

generate_text = transformers.pipeline(
    model=model, 
    tokenizer=tokenizer,
    return_full_text=True,  
    task='text-generation',
    stopping_criteria=stopping_criteria,  
    temperature=0.1,  
    max_new_tokens=512,  
    repetition_penalty=1.1 
)

st.title("Welcome To QFin - A FinNews Q&A System 📈")
st.image("/kaggle/working/llama-2.png", width=100)
st.markdown("***Powered By LLaMA 2***")
st.sidebar.title("News Article URLs")

urls = []
for i in range(5):
    url = st.sidebar.text_input(f"URL {i+1}")
    urls.append(url)
    
process_url_clicked = st.sidebar.button("Process URLs")
file_path = "/kaggle/working/Faiss.pkl"
main_placeholder = st.empty()
llm = HuggingFacePipeline(pipeline=generate_text)
vectorstore = None 
chat_history = []
if process_url_clicked:
    # load data
    documents = []
    for url in urls:
        if url:  # Ensure the URL is not empty
            try:
                loader = WebBaseLoader(url)  # Pass the URL directly, no 'urls' keyword
                main_placeholder.text(f"Loading URL: {url}")
                documents.extend(loader.load())
            except Exception as e:
                main_placeholder.error(f"Failed to load {url}: {e}")
                continue    # split data
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, separators=['\n\n', '\n', '.', ','],
                                                    chunk_overlap=20)
    main_placeholder.text("Text Splitter...Started...✅✅✅")
    docs = text_splitter.split_documents(documents)
    model_name = "ProsusAI/finbert"
    embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": device})
    vectorstore_mpnet = FAISS.from_documents(docs, embeddings)
    main_placeholder.text("Embedding Vector Started Building...✅✅✅")
    time.sleep(2)
    with open(file_path, "wb") as f:
        pickle.dump(vectorstore_mpnet, f)
    
    

query = main_placeholder.text_input("Question: ")
if query:
    if os.path.exists(file_path):
        with open(file_path, "rb") as f:
            vectorstore = pickle.load(f)
            chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), return_source_documents=True)
            result = chain({"question": query, "chat_history": chat_history}, return_only_outputs=True)
            st.header("Answer")
            st.write(result.get('answer', 'No answer found'))
            chat_history.append((query, result.get('answer', 'No answer found')))

    else:
        st.error("Vectorstore file does not exist. Please process URLs first.")

Overwriting QFin-App.py


In [4]:
!curl ipv4.icanhazip.com


34.75.197.27


In [None]:
!streamlit run QFin-App.py &>./logs.txt & npx localtunnel --port 8501


your url is: https://yellow-grapes-serve.loca.lt
