In [24]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI
print("Imports complete.")

# If you get an error running this cell, then please head over to the troubleshooting notebook!

Imports complete.


# Connecting to OpenAI (or Ollama)

The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI.  


In [43]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [44]:
openai = OpenAI()

# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.
# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions

# Let's make a quick call to a Frontier model to get started, as a preview!

In [45]:
# To give you a preview -- calling OpenAI with these messages is this easy. Any problems, head over to the Troubleshooting notebook.

message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

Hello! It's great to hear from you! Welcome! How can I assist you today?


## OK onwards with our first project

In [46]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [51]:
# Let's try one out. Change the website and add print statements to follow along.

# ed = Website("https://edwarddonner.com")
ed = Website("https://ai.xprize.org/prizes/artificial-intelligence/teams/deepdrug")
print(ed.title)
print(ed.text)

DeepDrug | AI XPRIZE
About XPRIZE
Mission
Impact
Leadership + Staff
Boards
Careers
Our Focus Areas
Biodiversity + Conservation
Climate + Energy
Deep Tech
Food + Water + Waste
Health
Learning + Society
Space + Exploration
Our Work
Competitions
Challenges
Youth Programs
Our Community
Benefactors
Sponsors
Partners
Alumni Network
News + Content
News
Press
Podcast
Events
Get Involved
Back arrow
DeepDrug
Baton Rouge, LA, United States
Team Website
About Us
The DeepDrug team is led by the PIs Dr. Supratik Mukhopadhyay, a Computer Scientist, and Dr. Michal Brylinski, a Computational Biologist.
Dr. Supratik Mukhopadhyay is a faculty member in Computer Science at Louisiana State University.  His research interests lie in the areas of Artificial Intelligence/Machine Learning with applications to Automated Drug Discovery, Satellite Imagery Recognition,  Transportation Systems, Sustainable Buildings, Cyber-Physical Human Systems, etc. In these areas, Dr. Mukhopadhyay's research  has been supported 

## Types of prompts

You may know this already - but if not, you will get very familiar with it!

Models like GPT4o have been trained to receive instructions in a particular way.

They expect to receive:

**A system prompt** that tells them what task they are performing and what tone they should use

**A user prompt** -- the conversation starter that they should reply to

In [52]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [53]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [54]:
print(user_prompt_for(ed))

You are looking at a website titled DeepDrug | AI XPRIZE
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

About XPRIZE
Mission
Impact
Leadership + Staff
Boards
Careers
Our Focus Areas
Biodiversity + Conservation
Climate + Energy
Deep Tech
Food + Water + Waste
Health
Learning + Society
Space + Exploration
Our Work
Competitions
Challenges
Youth Programs
Our Community
Benefactors
Sponsors
Partners
Alumni Network
News + Content
News
Press
Podcast
Events
Get Involved
Back arrow
DeepDrug
Baton Rouge, LA, United States
Team Website
About Us
The DeepDrug team is led by the PIs Dr. Supratik Mukhopadhyay, a Computer Scientist, and Dr. Michal Brylinski, a Computational Biologist.
Dr. Supratik Mukhopadhyay is a faculty member in Computer Science at Louisiana State University.  His research interests lie in the areas of Artificial Intelligence/Machine Learning with applications to

## Messages

The API from OpenAI expects to receive messages in a particular structure.
Many of the other APIs share this structure:

```python
[
    {"role": "system", "content": "system message goes here"},
    {"role": "user", "content": "user message goes here"}
]
```
To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)

In [57]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [58]:
# To give you a preview -- calling OpenAI with system and user messages:

response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)

Oh, you're really hitting me with the tough ones, huh? The answer is 4. Gold star for you!


## And now let's build useful messages for GPT-4o-mini, using a function

In [59]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [60]:
# Try this out, and then try for a few more websites

messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled DeepDrug | AI XPRIZE\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nAbout XPRIZE\nMission\nImpact\nLeadership + Staff\nBoards\nCareers\nOur Focus Areas\nBiodiversity + Conservation\nClimate + Energy\nDeep Tech\nFood + Water + Waste\nHealth\nLearning + Society\nSpace + Exploration\nOur Work\nCompetitions\nChallenges\nYouth Programs\nOur Community\nBenefactors\nSponsors\nPartners\nAlumni Network\nNews + Content\nNews\nPress\nPodcast\nEvents\nGet Involved\nBack arrow\nDeepDrug\nBaton Rouge, LA, United States\nTeam Website\nAbout Us\nThe DeepDrug team is led by the PIs Dr. Supratik Mukhopadhyay, a Computer 

## Time to bring it together - the API for OpenAI is very simple!

In [61]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [62]:
# summarize("https://edwarddonner.com")
summarize("https://ai.xprize.org/prizes/artificial-intelligence/teams/deepdrug")


"# DeepDrug | AI XPRIZE Summary\n\nThe DeepDrug initiative, part of the XPRIZE foundation, is focused on leveraging artificial intelligence (AI) to enhance global health through improved drug discovery. The team, led by Dr. Supratik Mukhopadhyay and Dr. Michal Brylinski from Louisiana State University, aims to create a rapid AI-based platform that can:\n\n- Innovate treatments for illnesses with no known cures.\n- Replace outdated drugs for pathogens that are now resistant.\n- Quickly respond to new disease outbreaks.\n- Enhance overall global health.\n\n## Key Features of DeepDrug\n\nDeepDrug is developing a comprehensive computer-aided drug design software consisting of four main components:\n\n1. **eSynth**: Uses AI to automatically synthesize new drug compounds.\n2. **eMolFrag**: Breaks down existing drug compounds into building blocks for new drug synthesis.\n3. **eToxPred**: Employs machine learning to predict drug toxicity and synthetic access based on molecular structures.\n4. 

In [63]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [64]:
display_summary("https://ai.xprize.org/prizes/artificial-intelligence/teams/deepdrug")

# Summary of DeepDrug | AI XPRIZE

The **DeepDrug** initiative is focused on utilizing artificial intelligence to enhance drug discovery processes, aiming to improve global health outcomes. Led by Dr. Supratik Mukhopadhyay and Dr. Michal Brylinski from Louisiana State University, the team seeks to develop an AI-based platform that significantly shortens the timeline for discovering new drug compounds.

## Key Objectives:
- **Address Illness**: Focus on diseases with no known cures and combat drug-resistant pathogens.
- **Rapid Drug Discovery**: Employ advanced computational techniques to reduce drug discovery time from years to months or even weeks.

## Core Components of DeepDrug:
1. **eSynth**: Synthesizes new drug compounds using AI-driven techniques.
2. **eMolFrag**: Breaks down existing compounds for the synthesis of new drugs.
3. **eToxPred**: Predicts toxicity and synthetic accessibility of synthesized compounds through machine learning.
4. **eDrugRes**: Identifies bacterial resistance to drugs by employing graph convolutional networks, facilitating drug repurposing.

The overarching mission of DeepDrug is to leverage AI capabilities for creating innovative solutions in drug discovery, ultimately fostering better health for the global population.

# Let's try more websites

Note that this will only work on websites that can be scraped using this simplistic approach.

Websites that are rendered with Javascript, like React apps, won't show up. See the community-contributions folder for a Selenium implementation that gets around this. You'll need to read up on installing Selenium (ask ChatGPT!)

Also Websites protected with CloudFront (and similar) may give 403 errors - many thanks Andy J for pointing this out.

But many websites will work just fine!

In [19]:
display_summary("https://cnn.com")

# Summary of CNN Website

The CNN website provides a comprehensive platform for breaking news, updates, and multimedia content across various categories including US news, world news, politics, business, health, entertainment, and weather.

## Key News Highlights:

- **Ukraine-Russia War**: Ongoing coverage and developments related to the conflict.
- **Israel-Hamas War**: Report on escalating tensions and impacts on geopolitics.
- **Memorial Day Sales**: Coverage of major sales events, highlighting significant deals from retailers.

## Notable Articles:

1. **Trump Administration's Education Policy**: The administration is accused of barring Harvard from enrolling international students, which has sparked a federal judge's intervention to block the policy affecting students' legal status.
   
2. **Tragic Incidents**: Reports include a plane crash in San Diego that resulted in fatalities following a collision with power lines and details concerning a hate crime investigation linked to the murders of Israeli Embassy staffers.

3. **Political Analysis**: Analysis of ongoing issues within the GOP regarding legislative strategies and the implications of major bills.

4. **Cultural Features**: Articles covering entertainment news, such as Jeremy Renner’s experiences post-recovery from a near-death situation and insights into celebrity legal battles.

5. **Scientific Findings**: Recent studies shedding light on climate news, discoveries in paleontology, and human health advisory.

The site is structured for easy navigation of various topics and also features podcasts and live TV options for further engagement with news content.

In [65]:
display_summary("https://ai.xprize.org/prizes/artificial-intelligence/teams/deepdrug")

# DeepDrug | AI XPRIZE Summary

DeepDrug is an initiative under the XPRIZE umbrella, focused on harnessing artificial intelligence to enhance global health through innovative drug discovery methods. Led by Dr. Supratik Mukhopadhyay and Dr. Michal Brylinski, the team aims to develop an AI-driven platform that accelerates the identification of new drugs, significantly reducing the time required for early-stage discovery from years to mere weeks or months.

## Key Objectives of DeepDrug:
- Discover new drugs for illnesses with no current cures.
- Replace ineffective treatments for drug-resistant pathogens.
- Rapidly respond to outbreaks of unknown diseases.
- Enhance overall health across global populations.

## Components of DeepDrug:
1. **eSynth**: AI-based synthesis of new drug compounds from basic building blocks.
2. **eMolFrag**: Decomposes existing drug compounds for recombinant synthesis.
3. **eToxPred**: Predicts toxicity and synthetic accessibility using machine learning.
4. **eDrugRes**: Utilizes graph convolutional networks to assess bacterial resistance to drugs, promoting drug repurposing.

The DeepDrug team is committed to leveraging AI technologies for transformative changes in drug discovery, potentially impacting health outcomes worldwide.

In [66]:
# Step 1: Create your prompts

system_prompt = "You are an assistant that writes concise, relevant subject lines for emails."
user_prompt = """
Please suggest a short, professional subject line for the following email:

Dear team,

I wanted to remind everyone that our quarterly planning meeting is scheduled for next Friday at 10am in the main conference room. Please come prepared with your department updates and any discussion points.

Best,
Alex
"""

# Step 2: Make the messages list

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]

# Step 3: Call OpenAI

response = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages
)

# Step 4: print the result

print(response.choices[0].message.content)

Reminder: Quarterly Planning Meeting Next Friday at 10 AM
