In [None]:
import os 
import json 
import ollama
import requests
from google import genai
from google.genai import types
from openai import OpenAI
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import display, Markdown, update_display

In [None]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
gemini_api_key = os.getenv('GEMINI_API_KEY')

OPENAI_MODEL = 'gpt-4o-mini'
GEMINI_MODEL = 'gemini-2.0-flash' 
OLLAMA_MODEL = 'llama3.2'

openai = OpenAI()
gemini = genai.Client(api_key = gemini_api_key)

In [None]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url 
        response = requests.get(url=self.url, headers=headers)
        self.body = response.content    
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(['script', 'style', 'img', 'input']):
                irrelevant.decompose
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = "" 
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_content(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
        


In [None]:
hk = Website("https://harshkakran.netlify.app/")
print(hk.get_content())

hk.links 

### Call a LLM that will filter out all the relevant links from the list of scraped links

In [None]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [None]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
print(get_links_user_prompt(hk))

In [None]:
def get_links_open_ai(website:Website):
    try:
        response = openai.chat.completions.create(
            model=OPENAI_MODEL,
            messages=[
                {"role":"system", "content": link_system_prompt},
                {"role":"user", "content": get_links_user_prompt(website)}
            ],
            response_format={"type": "json_object"}
        )

        result = response.choices[0].message.content
        return json.loads(result)
    except Exception as e:
        print(f"An error occurred during the API call: {e}")
        return None

In [None]:
def get_links_gemini(website:Website):
    try:
        response = gemini.models.generate_content(
            model=GEMINI_MODEL,
            config=types.GenerateContentConfig(
                system_instruction=link_system_prompt,
                response_mime_type="application/json"
            ),
            contents=get_links_user_prompt(website)
        )

        result = response.text
        return json.loads(result)
    except Exception as e:
        print(f"An error occurred during the API call: {e}")
        return None

In [None]:
def get_links_ollama(website: Website, model:str):
    model_to_use = model if model else OLLAMA_MODEL

    print(f"Using model: {model_to_use}\n\n")

    try:
        response = ollama.chat(
            model=model_to_use, 
            messages=[
            {"role":"system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
            ],
            format='json')
        
        result = response['message']['content']
        return json.loads(result)
    except Exception as e:
        print(f"An error occurred during the API call: {e}")
        return None

In [None]:
huggingface = Website("https://huggingface.co")
huggingface.links

In [None]:
get_links_gemini(huggingface)

In [None]:
get_links_ollama(huggingface, "")

In [None]:
get_links_open_ai(huggingface)

## Make the brochure

In [None]:
def get_all_details(website: Website, source: str, model: str):
    """
    Scrap all the relevate links for the brochure.
    """

    result = "Landing page:\n"
    result += website.get_content()
    
    links = {}
    if source == "ollama":
        links = get_links_ollama(website=website, model=model)
    elif source == "gemini":
        links = get_links_gemini(website=website)
    elif source == "openai":
        links = get_links_open_ai(website=website)
    
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link['url']).get_content()

    return result

In [None]:
print(get_all_details(huggingface, "openai", ""))

In [None]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [None]:
def get_brochure_user_prompt(company_name: str, website: Website, source: str, model: str):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(website, source=source, model=model)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
print(get_brochure_user_prompt("Hugging Face", huggingface,"gemini",""))

In [None]:
def create_brochure(company_name: str, source: str, model: str, website: Website):
    if source == 'ollama':
        model_to_use = model if model else OLLAMA_MODEL

        print(f"Using model: {model_to_use}\n\n")

        try:
            response = ollama.chat(
                model=model_to_use, 
                messages=[
                    {"role":"system", "content": system_prompt},
                    {"role": "user", "content": get_brochure_user_prompt(company_name, website, source, model)}
                ],
            )
            
            result = response['message']['content']
            return result
        except Exception as e:
            print(f"An error occurred during the API call: {e}")
            return None
    elif source == 'openai':
        try:
            response = openai.chat.completions.create(
                model=OPENAI_MODEL,
                messages=[
                    {"role":"system", "content": system_prompt},
                    {"role":"user", "content": get_brochure_user_prompt(company_name, website, source, model)}
                ],
                
            )

            result = response.choices[0].message.content
            return result
        except Exception as e:
            print(f"An error occurred during the API call: {e}")
            return None
    elif source == 'gemini':
        try:
            response = gemini.models.generate_content(
                model=GEMINI_MODEL,
                contents=f"{system_prompt}\n\n{get_brochure_user_prompt(company_name, website, source, model)}"
            )

            result = response.text
            return result
        except Exception as e:
            print(f"An error occurred during the API call: {e}")
            return None
    else:
        print("Source not supported")

In [None]:
display(Markdown(create_brochure("Hugging Face", "openai", "", huggingface)))

In [None]:
display(Markdown(create_brochure("Hugging Face", "gemini", "", huggingface)))

In [None]:
import google.generativeai as genai
from google.generativeai import GenerativeModel

def create_brochure_stream(company_name: str, source: str, model: str, website, stream: bool = True):
    if source == 'ollama':
        model_to_use = model if model else OLLAMA_MODEL
        try:
            response_stream = ollama.chat(
                model=model_to_use,
                messages=[
                    {"role":"system", "content": system_prompt},
                    {"role": "user", "content": get_brochure_user_prompt(company_name, website, source, model)}
                ],
                stream=stream
            )

            if stream:
                full_response = ""
                display_handle = display(Markdown(""), display_id=True)

                for chunk in response_stream:
                    if 'message' in chunk and 'content' in chunk['message']:
                        chunk_content = chunk['message']['content']
                        full_response += chunk_content
                        update_display(Markdown(full_response), display_id=display_handle.display_id)
            else:
                return response_stream['message']['content']

        except Exception as e:
            print(f"An error occurred during the Ollama API call: {e}")
            return None

    elif source == 'openai':
        model_to_use = model if model else OPENAI_MODEL
        print(f"Using OpenAI model: {model_to_use}\n")
        try:
            response_stream = openai.chat.completions.create(
                model=model_to_use,
                messages=[
                    {"role":"system", "content": system_prompt},
                    {"role": "user", "content": get_brochure_user_prompt(company_name, website, source, model)}
                ],
                stream=stream
            )

            if stream:
                full_response = ""
                display_handle = display(Markdown(""), display_id=True)

                for chunk in response_stream:
                    if chunk.choices[0].delta.content is not None:
                        chunk_content = chunk.choices[0].delta.content
                        full_response += chunk_content
                        update_display(Markdown(full_response), display_id=display_handle.display_id)
            else:
                return response_stream.choices[0].message.content

        except Exception as e:
            print(f"An error occurred during the OpenAI API call: {e}")
            return None

    elif source == 'gemini':
        model_to_use = model if model else GEMINI_MODEL
        print(f"Using Gemini model: {model_to_use}\n")
        try:
            genai.configure(api_key=gemini_api_key)
            model_instance = GenerativeModel(
                model_to_use,
                system_instruction=system_prompt
            )
            # prompt = f"System Prompt: {system_prompt}\n\nUser Prompt: {}."

            response_stream = model_instance.generate_content(
                contents=get_brochure_user_prompt(company_name, website, source, model),
                stream=stream
            )

            if stream:
                full_response = ""
                display_handle = display(Markdown(""), display_id=True)

                for chunk in response_stream:
                    full_response += chunk.text
                    update_display(Markdown(full_response), display_id=display_handle.display_id)
            else:
                return response_stream.text

        except Exception as e:
            print(f"An error occurred during the Gemini API call: {e}")
            return None

    else:
        print("Source not supported")
        return None

In [None]:
create_brochure_stream("Hugging Face", "gemini", "", huggingface, True)

In [None]:
create_brochure_stream("Hugging Face", "openai", "", huggingface, True)

In [None]:
def translate_brochure(brochure_content: str, language: str):
    translation_system_prompt = f"""You are a {language} language translation expert. Given the content of a brochure, your task is to provide the translated version of that brochure in the {language} language."""
    user_prompt = f"""Translate this text into {language}: \n {brochure_content}"""
    
    res = openai.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role":"system","content": translation_system_prompt},
            {"role":"user", "content": user_prompt}
        ],
        stream=True 
    )


    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in res:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [33]:
brochure_content = create_brochure("Hugging Face", "openai", "", huggingface)

In [36]:
translate_brochure(brochure_content, "Hindi")