In [1]:
# imports required packages

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI


In [2]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [3]:
openai = OpenAI()

# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.
# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions

In [4]:
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}
class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [6]:
# Let's try one out. Change the website and add print statements to follow along.

gs = Website("https://gayatri-sivani-susarla.github.io/GSS.portfolio/")
print(gs.title)
print(gs.text)

Gayatri Sivani Susarla Portfolio Website
Home
About
Certifications
Projects
Contact
Data Scientist
Hey!, I am
Gayatri Sivani Susarla
.
About Me
I am aspiring Data Scientist pursuing Master's degree in Data Science major at Stony Brook University, New York. I developed passion for Large Language Models and Big Data Analytics in my MS journey, and move forward to collaborate with professionals in these fields. I want to focus on solving real-world challenges, driving impactful results and grow professionally. I am eager to contribute to the evolution of the Data Science field and discover unrevealed data. Lets connect to explore data in all possible ways and invent new scientific skills to study the
"word".
Skills
Experience
Education
Programming Skills
Python | R | Java | SQL | OCAML | SAPBW
AI/ML Libraries
PyTorch | TensorFlow | SciKit-Learn
Cloud and Data visualization
Snowflake | CloudLab | Azure | PowerBI | SAP Analytics Cloud | Matplotlib | Seaborn | Plotly
Systems Engineer, Infosy

In [9]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [16]:
# write function user prompt that asks for summaries of websites:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\n The contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [15]:
print(user_prompt_for(gs))

You are looking at a website titled Gayatri Sivani Susarla Portfolio Website
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Home
About
Certifications
Projects
Contact
Data Scientist
Hey!, I am
Gayatri Sivani Susarla
.
About Me
I am aspiring Data Scientist pursuing Master's degree in Data Science major at Stony Brook University, New York. I developed passion for Large Language Models and Big Data Analytics in my MS journey, and move forward to collaborate with professionals in these fields. I want to focus on solving real-world challenges, driving impactful results and grow professionally. I am eager to contribute to the evolution of the Data Science field and discover unrevealed data. Lets connect to explore data in all possible ways and invent new scientific skills to study the
"word".
Skills
Experience
Education
Programming Skills
Python | R | Java | SQL | OCAML |

In [18]:
## function creates exactly the format above
def messages_for(website):
    return [
        {"role": "system","content":system_prompt},
        {"role": "user","content":user_prompt_for(website)}
    ]

In [19]:
messages_for(gs)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': 'You are looking at a website titled Gayatri Sivani Susarla Portfolio Website\n The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nHome\nAbout\nCertifications\nProjects\nContact\nData Scientist\nHey!, I am\nGayatri Sivani Susarla\n.\nAbout Me\nI am aspiring Data Scientist pursuing Master\'s degree in Data Science major at Stony Brook University, New York. I developed passion for Large Language Models and Big Data Analytics in my MS journey, and move forward to collaborate with professionals in these fields. I want to focus on solving real-world challenges, driving impactful results and grow professionally. I am eager to contribute to the evolut

In [24]:
## now use OpenAI API, to bring this together 
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content


In [25]:
summarize("https://gayatri-sivani-susarla.github.io/GSS.portfolio/")

"# Gayatri Sivani Susarla Portfolio Website\n\n## Overview\nThe portfolio website of Gayatri Sivani Susarla showcases her skills and experience as an aspiring Data Scientist currently pursuing a Master's degree in Data Science at Stony Brook University, New York. She has a keen interest in Large Language Models and Big Data Analytics, and aims to contribute innovative solutions to real-world data challenges.\n\n## About Me\n- Master's student in Data Science.\n- Passionate about data science, particularly in analyzing large datasets and collaborating with professionals in the field.\n- Committed to professional growth and impacting the field positively through data analysis.\n\n## Skills\n- **Programming Languages:** Python, R, Java, SQL, OCAML, SAPBW\n- **AI/ML Libraries:** PyTorch, TensorFlow, SciKit-Learn\n- **Cloud & Data Visualization:** Snowflake, CloudLab, Azure, PowerBI, SAP Analytics Cloud, Matplotlib, Seaborn, Plotly\n\n## Experience\n- **Systems Engineer at Infosys (Jul 2022

In [27]:
##function to display this nicely in the jupyter output, using markdown
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [28]:
display_summary("https://gayatri-sivani-susarla.github.io/GSS.portfolio/")

# Gayatri Sivani Susarla Portfolio Summary

**About Me**  
Gayatri Sivani Susarla is an aspiring Data Scientist currently pursuing a Master's degree at Stony Brook University, New York. Her interests lie in Large Language Models and Big Data Analytics, with a goal to solve real-world challenges and contribute to the evolution of Data Science.

---

**Skills and Experience**  
- **Programming Languages:** Python, R, Java, SQL, OCAML, SAPBW  
- **AI/ML Libraries:** PyTorch, TensorFlow, SciKit-Learn  
- **Cloud and Data Visualization Tools:** Snowflake, Azure, PowerBI, Matplotlib, Seaborn, Plotly  
- **Professional Experience:** Systems Engineer at Infosys (2022-2023), focused on SAPBI, data pipeline optimization, and ETL processes.

---

**Education**  
- **Master's in Data Science** (Jan 2024 - Present) - Stony Brook University, New York  
- **Bachelor in Technology** (June 2018 - May 2022) - Electronics and Instrumentation Engineering, Adikavi Nannaya University, Andhra Pradesh, India

---

**Certifications**  
- Mastering Generative AI: Gained skills in Generative AI and fine-tuning LLMs.  
- Snowflake: Expertise in scalable data pipelines and data warehouse optimization.  
- SAP ABAP: Certified developer showcasing enterprise application skills.  

---

**Projects**  
1. **NCAA-March-Madness-Basketball-Tournament-Outcome-Prediction-Model**: Using machine learning to predict outcomes of the 2024 collegiate basketball tournaments.  
2. **Exploratory Data Analytics-Airbnb Listings**: Analyzed New York Airbnb data for trends and patterns, providing insights for guests and hosts.  
3. **Quantum Search Algorithm on Weighted Databases**: Implemented an Adaptive-Grover-Algorithm using Qiskit and enhanced the original Grover’s search strategy.

---

**Contact Information**  
- Email: [gayatrisivani3010@gmail.com](mailto:gayatrisivani3010@gmail.com)  
- Phone: +1-9342464724

For more details, visit her [resume](#).

---

In [30]:
display_summary("https://www.bible.com/bible/111/MAT.8.NIV")

# Summary of Matthew 8 | NIV Bible | YouVersion

This webpage provides the full text of Matthew Chapter 8 from the New International Version (NIV) of the Bible. The chapter details several key events in the ministry of Jesus, including:

- **Healing a Man with Leprosy**: A leper approaches Jesus, expressing faith in His ability to heal. Jesus heals him and instructs him to show himself to the priest as a testimony.
- **The Faith of the Centurion**: A centurion seeks healing for his paralyzed servant. Jesus commends the centurion's faith and heals the servant from a distance.
- **Healing at Peter's House**: Jesus heals Peter's mother-in-law and many others who were sick or possessed by demons.
- **The Cost of Following Jesus**: Jesus explains the sacrifices required to follow Him, emphasizing that He has no permanent home.
- **Calming the Storm**: Jesus calms a storm on the lake, demonstrating His authority over nature, which amazes His disciples.
- **Restoration of Two Demon-Possessed Men**: Upon arriving in the Gadarenes, Jesus encounters two demons and frees them, sending the demons into a herd of pigs.

The chapter exemplifies Jesus' healing powers, authority, and the importance of faith in Him, alongside the challenges faced by His followers.

Additionally, the site offers features such as video resources related to Matthew 8, highlighting its applications in digital ministry and encouraging users to engage with the Bible daily.

In [31]:
os.getcwd()

'C:\\Users\\dell\\projects\\llm_engineering\\week1'