# AI Assignment from Vijayi WFH Technologies Pvt Ltd 

## Task 2 -  Streamlit app

In [1]:
import streamlit as st
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import json




In [16]:
# Loading components
@st.cache_resource
def load_model():
    return SentenceTransformer('./fine_tuned_model')

@st.cache_resource
def load_index():
    return faiss.read_index("quote_index.faiss")

@st.cache_resource
def load_data():
    return pd.read_pickle("quotes_df.pkl")

@st.cache_resource
def load_generator():
    return pipeline('text-generation', model='gpt2')

In [17]:
model = load_model()
index = load_index()
df = load_data()
generator = load_generator()

# Retrieval function
def retrieve_quotes(query, k=5):
    query_embed = model.encode([query])
    distances, indices = index.search(query_embed, k)
    return [
        {
            'quote': df.iloc[idx]['quote'],
            'author': df.iloc[idx]['author'],
            'tags': df.iloc[idx]['tags'],
            'score': float(1/(1 + distances[0][i]))
        }
        for i, idx in enumerate(indices[0])
    ]

# Response generation
def generate_response(query, context_quotes):
    context = "\n".join([f"{q+1}. {item['quote']} - {item['author']}" 
                        for q, item in enumerate(context_quotes)])
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    response = generator(prompt, max_length=256, temperature=0.7)[0]['generated_text']
    return response.split("Answer:")[-1].strip()

Device set to use cpu


In [18]:
# Streamlit UI
st.set_page_config(page_title="Quote Retrieval System", layout="wide")
st.title("📚 Semantic Quote Retrieval System")

# Query input
query = st.text_input("Enter your query:", placeholder="e.g., 'Quotes about hope by Oscar Wilde'")

if st.button("Search") and query:
    with st.spinner("Retrieving relevant quotes..."):
        # Retrieve quotes
        retrieved_quotes = retrieve_quotes(query)
        
        # Generate response
        answer = generate_response(query, retrieved_quotes)
        
        # Display results
        st.subheader("Generated Answer")
        st.info(answer)
        
        st.subheader("Source Quotes")
        for i, quote in enumerate(retrieved_quotes):
            with st.expander(f"Quote {i+1} (Score: {quote['score']:.2f}):"):
                st.write(f"**{quote['quote']}**")
                st.caption(f"Author: {quote['author']}")
                st.caption(f"Tags: {', '.join(quote['tags'])}")
        
        # JSON output
        st.subheader("Structured Output")
        output = {
            "query": query,
            "answer": answer,
            "sources": retrieved_quotes
        }
        st.json(output)
        
        # Download button
        st.download_button(
            label="Download Results as JSON",
            data=json.dumps(output, indent=2),
            file_name="quote_results.json",
            mime="application/json"
        )



In [5]:
# Sidebar info
st.sidebar.header("About")
st.sidebar.info("""
This RAG-powered system retrieves relevant quotes based on semantic similarity.
- **Fine-tuned model**: sentence-transformers/all-MiniLM-L6-v2
- **Vector database**: FAISS
- **LLM**: GPT-2 
""")



DeltaGenerator(_root_container=1, _parent=DeltaGenerator())