## This notbeook is for using the local ollama to generate the summarization of a webapge

In [11]:
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [12]:
# this class extracts/parses the webpage

class Website:

    def __init__(self, url):
        self.url = url

        #Creating a headless browser
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--enable-javascript")
        chrome_options.add_argument("--enable-cookies")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        driver = webdriver.Chrome(options=chrome_options)
        driver.get(url)

        if "Just a moment..." in driver.title:
            print("Cloudflare detected. Waiting...")
            time.sleep(5)

        element = WebDriverWait(driver,10)
        driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
        
        page_source = driver.page_source
        driver.quit()
        
        soup = BeautifulSoup(page_source, 'html.parser')

        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [27]:
# lets call the website class to get the extracted content

extracted_text = Website("https://en.wikipedia.org/wiki/List_of_Telugu_films_of_2024")
print(extracted_text.title)
print(extracted_text.text)

List of Telugu films of 2024 - Wikipedia
Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Contents
move to sidebar
hide
(Top)
1
Box office collection
2
January–March
3
April–June
4
July–September
5
October–December
6
See also
7
Notes
8
References
Toggle the table of contents
List of Telugu films of 2024
1 language
తెలుగు
Edit links
Article
Talk
English
Read
View source
View history
Tools
Tools
move to sidebar
hide
Actions
Read
View source
View history
General
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Cite this page
Get shortened URL
Download QR code
Expand all
Edit interlanguage links
Print/export
Download as PDF
Print

In [28]:
# Now that we have the extracted content of the webage. Lets create necessary variables to call the ollama

OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [29]:
# lets generate the prompts to give the context to the LLM

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown. Only use the important details from the context"

user_prompt = f"You are looking at a website titled {extracted_text.title}"
user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
user_prompt += extracted_text.text
messages = [
    {"role":"system", "content": system_prompt},
    {"role":"user", "content":user_prompt}
]

In [30]:
payload = {
        "model": MODEL,
        "messages": messages,
        "stream": False
    }

In [31]:
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
print(response.json()['message']['content'])

Here are some Telugu films released in 2024:

1. **Puthintala Pelli** (January) - Action thriller film starring Nandita Swetha and Vishnu Manchu.
2. **Rishi** (February) - Romantic drama film starring Varun Sandesh and Dhanushka.
3. **Vijaya** (March) - Historical drama film starring Anirudh Boina and Roshini.
4. **Brahma Murthi** (April) - Science fiction film starring Siddarth Glan and Kalyani Nuthala.
5. **Jagadguru** (May) - Biographical drama film starring Prabhas and Shraddha Srinath.
6. **Saptagiri** (June) - Family drama film starring Tejasu Akshara and Vishnu Manchu.
7. **Yagna** (July) - Thriller film starring Nandamuri Hari and Shriya Bhupathi.
8. **Rudrakaalam** (August) - Horror film starring Raj Tarun and Hebah Patel.
9. **Uyyala Jampala** (September) - Romantic comedy-drama film starring Sai Kiran and Vishnu Manchu.
10. **Shiva Shakti** (October) - Action thriller film starring Prabhas and Anushka Shetty.

Note: The list is not exhaustive and the release dates are subjec