In [12]:
!pip install wikipedia langchain-openai langchain_community streamlit sentence_transformers



In [14]:
!pip install -qU langchain-openai

In [15]:
!pip install chroma



In [28]:
# %%writefile app.py
import streamlit as st
from langchain_community.document_loaders import WikipediaLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain import PromptTemplate

# Configuration (replace with your actual API key and endpoint)
api_key = "63181137e3e74d1d8cbe3a2a4c7a5b8d"
api_version = "2024-02-01"
azure_endpoint = "https://dono-rag-demo-resource-instance.openai.azure.com"
model_name = "GPT_35_TURBO_DEMO_RAG_DEPLOYMENT_DONO"

# Load the OpenAI model
try:
    model = AzureChatOpenAI(
        temperature=0, api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint, model="GPT_35_TURBO_DEMO_RAG_DEPLOYMENT_DONO"
    )
except Exception as e:
    st.error(f"Error loading OpenAI model: {e}")
    st.stop()

# Function to extract text from Wikipedia documents
def extract_text(wikipedia_link):
    docs = WikipediaLoader(query=wikipedia_link, load_max_docs=5).load()
    text_content = ""
    for page in docs:
        text_content += page.page_content
    return text_content.strip()

# Streamlit app
st.title("FIFA World Cup Q&A with Streamlit")
st.subheader("Using Large Language Models (LLMs) and Chroma Embeddings")

# User input for Wikipedia link
wikipedia_link = st.text_input("Enter a Wikipedia link (e.g., https://en.wikipedia.org/wiki/FIFA_World_Cup#Results):")

# User input for question
question = st.text_input("Ask a question about the FIFA World Cup:")

if wikipedia_link and question:
    # Extract text from Wikipedia documents
    try:
        text = extract_text(wikipedia_link)
    except Exception as e:
        st.error(f"Error extracting text from Wikipedia: {e}")
        st.stop()

    # Create the embedding function
    embedding_function = SentenceTransformerEmbeddings(model_name=model_name)

    # In-memory Chroma for improved performance (consider saving if needed)
    vectorstore = embedding_function.build_index(text.split("\n\n"))

    # Retriever for efficient searching
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

    # Prompt template for LLMs (question-answering format)
    llm_prompt_template = """You are an assistant for question-answering tasks.
    Use the following context to answer the question.
    I have given you data on 5 years of football matches. You have to answer in a few words based on the query.
    Question: {question} \nContext: {context} \nAnswer:"""

    llm_prompt = PromptTemplate.from_template(llm_prompt_template)

    # Data processing function
    def format_docs(docs):
        return "\n\n".join(doc for doc in docs)

    # RAG chain for question answering
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | llm_prompt
        | model
        | StrOutputParser()
    )

    # Call the RAG chain to get the answer
    try:
        answer = rag_chain.invoke(question=question)
        st.success(f"Answer: {answer}")
    except Exception as e:
        st.error(f"Error during question answering: {e}")

else:
    st.info("Please enter both the Wikipedia link and your question.")


In [29]:
!npm install -g localtunnel


[K[?25h/tools/node/bin/lt -> /tools/node/lib/node_modules/localtunnel/bin/lt.js
+ localtunnel@2.0.2
updated 1 package in 1.092s


In [30]:
!streamlit run app.py &>/content/logs.txt &

In [31]:
import urllib
localtunnel_password=urllib.request.urlopen('http://ipv4.icanhazip.com').read().decode('utf8').strip("\n")
localtunnel_password

'34.138.111.61'

In [None]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 1.518s
your url is: https://sour-actors-turn.loca.lt
