# Collect and store ~20â€“30 medRxiv abstracts related to infectious diseases.

### Import required libraries

In [10]:
%pip install -r ../requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [11]:
import requests
import json
from pathlib import Path

### Setting up data folder

In [12]:
data_path = Path("../data")
data_path.mkdir(exist_ok=True)

### Downloading medRxiv abstracts

In [13]:
url = "https://api.biorxiv.org/covid19/0/json"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    
    abstracts = data.get("collection", [])
    print(f"Fetched {len(abstracts)} abstracts")

else:
    print("Failed to retrieve data:", response.status_code)
    abstracts = []

Fetched 30 abstracts


### Save abstracts

In [14]:
with open(data_path / "abstracts_raw.json", "w", encoding="utf-8") as f:
    json.dump(abstracts, f, ensure_ascii=False, indent=4)

print(f"Abstracts saved to {data_path / 'abstracts_raw.json'}")

Abstracts saved to ..\data\abstracts_raw.json


### Verify data

In [15]:
if abstracts:
    sample = abstracts[0]
    print("Sample abstract:")
    print(f"Title: {sample.get('rel_title')}")
    print(f"DOI: {sample.get('rel_doi')}")
    print(f"Authors: {sample.get('rel_authors')}")
    print(f"Date: {sample.get('rel_date')}")
    
    abstract_text = sample.get('rel_abs') or ""
    print(f"Abstract: {abstract_text[:200]}...")

Sample abstract:
Title: Repeated COVID-19 vaccine boosters elicit variant-specific memory B cells in humans
DOI: 10.1101/2025.10.16.682893
Authors: [{'author_name': 'M. Alejandra Tortorici', 'author_inst': 'University of Washington'}, {'author_name': 'Kaitlin R Sprouse', 'author_inst': 'University of Washington'}, {'author_name': 'Amin Addetia', 'author_inst': 'University of Washington'}, {'author_name': 'Jack T Brown', 'author_inst': 'University of Washington'}, {'author_name': 'Alex Harteloo', 'author_inst': 'University of Washington'}, {'author_name': 'Anna Elias-Warren', 'author_inst': 'University of Washington'}, {'author_name': 'Helen Y Chiu', 'author_inst': 'University of Washington'}, {'author_name': 'David Veesler', 'author_inst': 'University of Washington'}]
Date: 2025-10-20
Abstract: The first exposure to a pathogen or an antigen profoundly impacts immune responses upon subsequent encounter with related pathogens. This immune imprinting explains that infection or vaccination