In [65]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [67]:
llm=ChatOpenAI(
    temperature=0.0,
    model_name="openai/gpt-3.5-turbo",
    openai_api_base="https://openrouter.ai/api/v1",
    openai_api_key=st.secrets["OPENAI_API_KEY"])



1. **LOAD DATA**

In [68]:
loaders =  UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])

data = loaders.load()
data

[Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nSubscription Products\n\nprofile\n\nAmbareesh Baliga\n\nprofile\n\nCK Narayan\n\nprofil

2. **SPLIT DATA**

In [69]:
docs = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(data)

In [70]:
len(docs)

17

3. **CREATE EMBEDDINGS**

In [71]:
from sentence_transformers import SentenceTransformer
encoder = SentenceTransformer('all-mpnet-base-v2')
vectors = encoder.encode([doc.page_content for doc in docs])

In [72]:
vectors.shape

(17, 768)

In [73]:
import pickle
with open('vectors.pkl', 'wb') as f:
    pickle.dump(vectors, f)


In [74]:
import faiss

loaded_vectors = pickle.load(open('vectors.pkl', 'rb'))
index = faiss.IndexFlatL2(loaded_vectors.shape[1])
index.add(loaded_vectors)

In [75]:
query = "What is the latest news on Tesla?"
query_vector = encoder.encode([query])

D, I = index.search(query_vector, k=2)

#convert vectors to documents
retrieved_docs = [docs[i] for i in I[0]]


4. **QUERY THE LLM**

In [76]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="Answer the question based on the context below.\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:"
)

chain = LLMChain(
    llm=llm,
    prompt=prompt_template
)

response = chain.run(
    context="\n\n".join([doc.page_content for doc in retrieved_docs]),
    question=query
)

In [77]:
response

'The latest news on Tesla is that it rallied 10% after Morgan Stanley upgraded the electric car maker to "overweight" from "equal-weight," saying its Dojo supercomputer could boost the company\'s market value by nearly $600 billion.'