In [None]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI
import gradio as gr

In [None]:
load_dotenv()

# ===== API Setup =====
api_key1 = os.getenv("GROQ_API_KEY")
Groq_MODEL = "llama-3.1-8b-instant"

ollama_groq = OpenAI(
    api_key=api_key1,
    base_url="https://api.groq.com/openai/v1"
)


In [None]:
# ===== Link Selection Prompt =====
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [None]:
def get_links_user_prompt(url):
    links = fetch_website_links(url)
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a chatbot so that it can answer any question. 
Respond in JSON format only.
Do not include Terms of Service, Privacy, or email links.

Links (some might be relative):
"""
    user_prompt += "\n".join(links)
    return user_prompt

In [None]:
get_links_user_prompt("https://www.biman-airlines.com/")

In [None]:
def select_relevant_links(url):
    response = ollama_groq.chat.completions.create(
        model=Groq_MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [None]:
select_relevant_links("https://www.biman-airlines.com/")

In [None]:
# ===== Fetch Page Contents + Relevant Links =====
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [None]:
fetch_page_and_all_relevant_links("https://www.biman-airlines.com/")

In [None]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}.
Here are the contents of its landing page and other relevant pages;
use this information to build a structured database.
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5000]  # truncate if too long
    return user_prompt

In [None]:
# ===== System Prompt for Brochure =====
system_prompt = """
You are a specialized Biman Airlines information assistant. 
Your task is to analyze any public webpage related to Biman Airlines and extract only valuable structured information:

- Flight schedules and general arrival/departure times  
- Flight delays or late times (without using specific passenger data)  
- Flight status updates (on-time, delayed)  
- Biman Airlines services, FAQ info, baggage policies, routes, gates, terminals  

Important rules:
1. Never include any personal passenger information or specific bookings.  
2. Only use publicly available data.  
3. Output JSON only.
4. If info is missing, use null.
"""

In [None]:
# ===== Create Brochure =====
def create_brochure(company_name, url):
    user_prompt = get_brochure_user_prompt(company_name, url)
    response = ollama_groq.chat.completions.create(
        model=Groq_MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
    )
    result = response.choices[0].message.content
    return result

In [None]:
# ===== Chat Function =====
def chat(message, history):
    # Fetch brochure once for context
    brochure_data = create_brochure("Biman Bangladesh", "https://www.biman-airlines.com")
    
    messages = [{"role": "system", "content": brochure_data}]
    messages += [{"role": h["role"], "content": h["content"]} for h in history]
    messages.append({"role": "user", "content": message})
    
    stream = ollama_groq.chat.completions.create(model=Groq_MODEL, messages=messages, stream=True)
    
    response = ""
    for chunk in stream:
        delta = chunk.choices[0].delta.content
        if delta:
            response += delta
            yield response

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()