# Install Required Packages

In [44]:
!pip install faiss-cpu
!pip install sentence-transformers
!pip install streamlit
!pip install PyMuPDF
!pip install requests
!pip install beautifulsoup4
!pip install youtube-transcript-api
!pip install google-auth
!pip install google-auth-oauthlib
!pip install google-auth-httplib2
!pip install google-api-python-client
!pip install pyngrok



# Import Necessary Libraries

In [None]:
import os
import fitz  # PyMuPDF
import requests
from bs4 import BeautifulSoup
from youtube_transcript_api import YouTubeTranscriptApi
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import streamlit as st
from pyngrok import ngrok
from dotenv import load_dotenv

# Set Up Environment Variables

In [None]:
# Load environment variables
load_dotenv()

# Set API key
API_KEY = os.getenv("AIzaSyCoeubPB9VPTJJgMC6hPhkwA6hbHGLX0ZY")

# Define Functions for Text Processing and Question Answering

In [None]:
# Function to query Google Generative AI using API key
def query_google_genai(prompt):
    url = f"https://generativelanguage.googleapis.com/v1beta2/models/gemini-pro:generateText?key={API_KEY}"
    headers = {
        "Content-Type": "application/json",
    }
    data = {
        "prompt": {
            "text": prompt
        },
        "maxOutputTokens": 1000
    }
    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        result = response.json()
        return result.get('generations', [{}])[0].get('text', '')
    else:
        return "Error: Could not retrieve response from Google Generative AI"

# Extract text from PDF

In [None]:
def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document.load_page(page_num)
        text += page.get_text()
    return text

pdf_text = extract_text_from_pdf("/content/Apple_Vision_Pro_Privacy_Overview.pdf")

# Scrape text from website

In [None]:
def scrape_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    text = ' '.join([p.text for p in soup.find_all('p')])
    return text

website_text = scrape_website("https://www.apple.com/apple-vision-pro/")

# Get YouTube transcript


In [None]:
def get_youtube_transcript(video_id):
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
    transcript = ' '.join([t['text'] for t in transcript_list])
    return transcript

youtube_transcript = get_youtube_transcript("TX9qSaGXFyg")

# Load pre-trained model and create FAISS index

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')
texts = [pdf_text, website_text, youtube_transcript]
embeddings = model.encode(texts)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Function to create the conversational prompt

In [45]:
def create_prompt(user_query, retrieved_text):
    prompt = f"""Answer the question as detailed as possible from the provided context, make sure to provide all the details.
If the answer is not in the provided context, just say, "answer is not available in the context", don't provide a wrong answer.\n\n
Context:\n {retrieved_text}\n
Question:\n {user_query}\n
Answer:"""
    return prompt

# Function to retrieve the most relevant text from FAISS index
def retrieve_relevant_text(user_query, index, model, texts):
    query_embedding = model.encode([user_query])
    D, I = index.search(query_embedding, k=1)
    relevant_text = texts[I[0][0]]
    return relevant_text

# Streamlit interface

In [None]:
st.title("Apple Vision Pro Chatbot")
st.write("Ask me anything about the Apple Vision Pro!")

# User input
user_query = st.text_input("Your question:")

if user_query:
    # Retrieve the most relevant text from FAISS index
    relevant_text = retrieve_relevant_text(user_query, index, model, texts)

    # Create prompt for Google Generative AI
    prompt = create_prompt(user_query, relevant_text)

    # Get response from Google Generative AI
    response = query_google_genai(prompt)

    # Display the response
    st.write("Response:")
    st.write(response)


# Setup ngrok and Authenticate

In [46]:
!ngrok authtoken 2iSlOTsSlMwZIPvezCeuE37buT6_6dxESw2qSiwMTvUxGDiEn

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [47]:
public_url = ngrok.connect(8501)
print(f"Public URL: {public_url}")

Public URL: NgrokTunnel: "https://6f8f-34-16-221-53.ngrok-free.app" -> "http://localhost:8501"


# Write the Streamlit App Code to a File

In [64]:
code = """import os
import fitz  # PyMuPDF
import requests
from bs4 import BeautifulSoup
from youtube_transcript_api import YouTubeTranscriptApi
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import streamlit as st
from pyngrok import ngrok
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set API key
API_KEY = os.getenv("AIzaSyCoeubPB9VPTJJgMC6hPhkwA6hbHGLX0ZY")

# Function to query Google Generative AI using API key
def query_google_genai(prompt):
    url = f"https://generativelanguage.googleapis.com/v1beta2/models/gemini-pro:generateText?key={API_KEY}"
    headers = {
        "Content-Type": "application/json",
    }
    data = {
        "prompt": {
            "text": prompt
        },
        "maxOutputTokens": 1000
    }
    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        result = response.json()
        return result.get('generations', [{}])[0].get('text', '')
    else:
        return "Error: Could not retrieve response from Google Generative AI"

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document.load_page(page_num)
        text += page.get_text()
    return text

pdf_text = extract_text_from_pdf("/content/Apple_Vision_Pro_Privacy_Overview.pdf")

# Scrape text from website
def scrape_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    text = ' '.join([p.text for p in soup.find_all('p')])
    return text

website_text = scrape_website("https://www.apple.com/apple-vision-pro/")

# Get YouTube transcript
def get_youtube_transcript(video_id):
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
    transcript = ' '.join([t['text'] for t in transcript_list])
    return transcript

youtube_transcript = get_youtube_transcript("TX9qSaGXFyg")

# Load pre-trained model and create FAISS index
model = SentenceTransformer('all-MiniLM-L6-v2')
texts = [pdf_text, website_text, youtube_transcript]
embeddings = model.encode(texts)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Function to create the conversational prompt
# Function to create the conversational prompt
def get_conversational_chain():
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer.
    Context:\n {context}\n
    Question:\n {question}\n
    Answer:
    """ # Indent this line
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) # Indent this line
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) # Indent this line
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) # Indent this line
    return chain # Indent this line

# Function to retrieve the most relevant text from FAISS index
def retrieve_relevant_text(user_query, index, model, texts):
    query_embedding = model.encode([user_query])
    D, I = index.search(query_embedding, k=1)
    relevant_text = texts[I[0][0]]
    return relevant_text

# Streamlit interface
st.title("Apple Vision Pro Chatbot")
st.write("Ask me anything about the Apple Vision Pro!")

# User input
user_query = st.text_input("Your question:")

if user_query:
    # Retrieve the most relevant text from FAISS index
    relevant_text = retrieve_relevant_text(user_query, index, model, texts)

    # Create prompt for Google Generative AI
    prompt = create_prompt(user_query, relevant_text)

    # Get response from Google Generative AI
    response = query_google_genai(prompt)

    # Display the response
    st.write("Response:")
    st.write(response)

"""

# Save the code to a file named app.py
with open("app.py", "w") as f:
    f.write(code)

print("app.py has been created.")


IndentationError: unexpected indent (<ipython-input-64-aee5ddcc379e>, line 78)

In [None]:
import subprocess
subprocess.run(["streamlit", "run", "app.py"])