In [25]:
from bs4 import BeautifulSoup
from openai import OpenAI
import json
import requests
from IPython.display import display, Markdown,update_display


In [2]:
MODEL = "llama3.2"
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key="ollama")
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


In [3]:

class Website():

    def __init__(self, url):

        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")
        self.title = soup.title.string if soup.title else "Title not found"

        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)

        else: 
            soup.text = ""

        links = [link.get("href") for link in soup.find_all("a")]
        self.links = [link for link in links]

    def get_content(self):

        return f"Looking at the websiter {self.title} with contents: \n {self.text}"
    

In [4]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of an esteemed professor {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a motivational letter writting for a PhD position, respond with the full https URL in JSON format. \
                    Focus on links that might contain information about their scientific work. Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [6]:

def get_links(url):

    website = Website(url)

    response = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages= [
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content

    return json.loads(result)



{'links': [{'type': 'Research Page', 'url': 'https://www.dimphnaIJerLab.org/research/'}, {'type': 'Publications Page', 'url': 'https://www.dimphnaIJerLab.org/publications/'}, {'type': 'MembersPage', 'url': 'https://www dimphnaIjerLab.org/members/', "[removed, likely relative link, couldn't find relevant content on this page]” }and the following links for publication 1 and 2 which contain DOIs that contain scientific information:”], and [1, and then further added from “members” section with specific research area and name of person. and the first 25 characters of title “Research Interests > Home > Research Areas > RNA Binding Molecules and Cellular Regulation > Dr R A Brandes Lab > Publications >”]from published links we see full papers here but also a number list that seems relevant and from that link, there is some scientific info about research on RNA binding molecules and their relation to cellular regulation.]”,””,”,}”,}“),”],and more importantly from “members” and “publications” s

In [12]:
def get_details(url):
    result = "Landing Page\n"

    try:
        result += Website(url).get_content()
    except requests.exceptions.RequestException as e:
        result += f"\n[Error loading landing page: {e}]\n"

    links = get_links(url)

    for link in links.get("links", []):
        try:
            result += f"\n\n{link['type']}\n"
            result += Website(link["url"]).get_content()
        except requests.exceptions.RequestException as e:
            result += f"\n[Error loading {link['url']}: {e}]\n"

    return result

In [13]:
print(get_details("https://www.dimphnameijerlab.org/"))

Landing Page
Looking at the websiter Dimphna Meijer lab - Meijer Lab with contents: 
 Skip to content
Toggle Navigation
Home
Research
Members
Publications
Come join us
Molecular Neurobiology
Dimphna Meijer lab
admin
2024-05-08T12:46:40+00:00
Meijer Lab
Our lab investigates development of the central nervous system
.
We integrate structural biology, biophysics, and cell biology to resolve the molecular mechanisms underlying synapse assembly.
Key publications
Nature Communications
(2024)
Alternative splicing controls teneurin-3 compact dimer formation for neuronal recognition
Nature Communications (2022)
Structural insights into the contactin 1 – neurofascin 155 adhesion complex
All our publications
Affiliations
News
New PhD position available!
Gallery
New PhD position available!
New PhD position available!
We have a new PhD position available. Please see here for more details. We especially encourage protein biochemists, structural biologists [...]
Dimphna
2025-04-08T12:12:14+00:00
Apri

In [16]:
system_prompt_letter = """You are an aspiring PhD student. Based on the data provided on the website created you are going to write a motivational letter applying to the lab of the person " \
                            whose website you are looking at. Motivational letter is going to be composed out of your summarization of why you are interested in the postion based on the text provided on the website.
                            Also, you are going to propose novel methods that can be used or new perspectives that can be taken in the lab"""


In [17]:
def user_prompt_letter(prof_name, url):
    user_prompt = f"You are looking at a website of a professor: {prof_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to write a motivational letter.\n"
    user_prompt += get_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt


In [29]:
def create_reference_letter(prof_name, url):
    response = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_letter},
            {"role": "user", "content": user_prompt_letter(prof_name, url)}
          ],
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [30]:
create_reference_letter("Dimphna Meijer, PhD", "https://www.dimphnameijerlab.org/")

[Your Name]
[Your Address]
[City, Country]
[Email Address]
[Phone Number]

[Date]

Professor Dimphna Meijer
Dimphna Meijer Lab
Delft University of Technology
Kavli Institute of Nanoscience Delft

Dear Professor Meijer,

I am writing to express my enthusiastic interest in the newly available PhD position in your esteemed lab. As I delved into your research group's website, I was captivated by the cutting-edge molecular neurobiology investigations you conduct at the intersection of structural biology, biophysics, and cell biology.

Your groundbreaking work in understanding the mechanisms underlying synapse assembly resonates deeply with my academic interests and career aspirations. The recent publication on "Alternative splicing controls teneurin-3 compact dimer formation for neuronal recognition" (Gogou et al., 2024) presented in Nature Communications has sparked a burning curiosity to contribute to your research group's impressive track record of publications.

Your collaborative appro