In [3]:
!pip install wikipedia langchain-openai langchain_community streamlit sentence_transformers chromadb



In [2]:
!pip install -qU langchain-openai

In [30]:
%%writefile app.py
import streamlit as st
from langchain_community.document_loaders import WikipediaLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import StrOutputParser
from langchain_openai import AzureChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain import PromptTemplate

# Configure the OpenAI model
model = AzureChatOpenAI(
    temperature=0,
    api_key="f8495f0766f64e46a7099a57a7ba3044",
    api_version="2024-02-01",
    azure_endpoint="https://dono-rag-demo-resource-instance.openai.azure.com",
    model="GPT_35_TURBO_DEMO_RAG_DEPLOYMENT_DONO"
)

# Load and process documents from Wikipedia
@st.cache_data
def load_and_process_wikipedia_data(link, max_docs=5):
    docs = WikipediaLoader(query=link, load_max_docs=max_docs).load()
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=512, chunk_overlap=0)
    return text_splitter.split_documents(docs)

# Create the embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Load documents and prepare vector store
link = "https://en.wikipedia.org/wiki/FIFA_World_Cup#Results"
docs = load_and_process_wikipedia_data(link)

# Save to disk
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embedding_function,
    persist_directory="./chroma_db"
)

# Load from disk
vectorstore_disk = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embedding_function
)

retriever = vectorstore_disk.as_retriever(search_kwargs={"k": 5})

# Define the prompt template
llm_prompt_template ="""You are an expert in providing structured and concise answers to football-related queries.
Use the following context to answer the question.
Your response should be in the following format:
- **Winner**: [Winner's Name]
- **Score**: [Score]
- **Date**: [Date]
- **Location**: [Location]
- **Summary**: [A brief summary of the match]

Question: {question}
Context: {context}
Answer:"""

llm_prompt = PromptTemplate.from_template(llm_prompt_template)

# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Define the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | llm_prompt
    | model
    | StrOutputParser()
)

# Streamlit application
st.title("FIFA World Cup Query Assistant")

# Input box for user query
user_query = st.text_input("Enter your question about the FIFA World Cup:")

# Button to run the query
if st.button("Get Answer"):
    if user_query:
        # Get the response from the RAG chain
        response = rag_chain.invoke(user_query)
        st.write("Answer:", response)
    else:
        st.write("Please enter a question.")


Overwriting app.py


In [31]:
!npm install -g localtunnel


[K[?25h/tools/node/bin/lt -> /tools/node/lib/node_modules/localtunnel/bin/lt.js
+ localtunnel@2.0.2
updated 1 package in 0.959s


In [32]:
!streamlit run app.py &>/content/logs.txt &

In [33]:
import urllib
localtunnel_password=urllib.request.urlopen('http://ipv4.icanhazip.com').read().decode('utf8').strip("\n")
localtunnel_password

'34.138.111.61'

In [34]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 1.553s
your url is: https://floppy-meals-occur.loca.lt
^C
