## Connect to Database

In [8]:
from neomodel import config
from neomodel import adb
import os

# Extract environment variables
neo4j_user = os.getenv("NEO4J_USERNAME", "neo4j")
neo4j_password = os.getenv("NEO4J_PASSWORD", "password")

# Create URL
neo4j_url = f"bolt://{neo4j_user}:{neo4j_password}@neo4j.devcontainer:7687"

# Set neomodel configuration
config.DATABASE_URL = neo4j_url

In [9]:
print(f"Neo4j URL: {neo4j_url}")

try:
    await adb.set_connection(config.DATABASE_URL)
    print("Connected successfully!")
except Exception as e:
    print("Failed to connect:", e)

Neo4j URL: bolt://neo4j:password@neo4j.devcontainer:7687
Connected successfully!


# Testing

In [44]:
from scholar.models import ScholarPaper, ScholarAuthor

In [45]:
from shared.utils import exponential_backoff_retry, RateLimitExceededError
import requests

In [46]:
DEFAULT_PAPER_FIELDS = "title,abstract,venue,publicationVenue,year,referenceCount,citationCount,influentialCitationCount,publicationTypes,publicationDate,journal,authors"
DEFAULT_AUTHOR_FIELDS = "authorId,url,name,affiliations,homepage,paperCount,citationCount,hIndex"

### Search by Title

In [47]:
def search_papers_by_title(title, year=None, fields = DEFAULT_PAPER_FIELDS) -> list[ScholarPaper]:
    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": f"title:({title})",
        "fields": fields
    }
    if year:
        params['year'] = year

    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data.get("data"):
            return [ScholarPaper.from_dict(paper) for paper in data["data"]]
        else:
            return []
    elif response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    else:
        response.raise_for_status()

In [None]:
papers = search_papers_by_title("Visualizing Structure and Transitions in High-Dimensional Biological Data")
print(papers[0])

ScholarPaper(paper_id='8ad65513498b428656d51cb097b66b6fd5798bd0', url='', title='Visualizing structure and transitions in high-dimensional biological data', abstract=None, venue='Nature Biotechnology', publication_venue={'id': '458166b3-de17-4bf3-bbbb-e53782de2f0f', 'name': 'Nature Biotechnology', 'type': 'journal', 'alternate_names': ['Nat Biotechnol'], 'issn': '1087-0156', 'url': 'http://www.nature.com/nbt/', 'alternate_urls': ['http://www.nature.com/nbt']}, year=2019, reference_count=89, citation_count=734, influential_citation_count=90, fields_of_study=[], publication_types=['JournalArticle'], publication_date='2019-12-01', journal={'name': 'Nature Biotechnology', 'pages': '1482 - 1492', 'volume': '37'}, authors=[ScholarAuthor(author_id='144493740', url='', name='Kevin R. Moon', affiliations=[], homepage=None, paper_count=0, citation_count=0, h_index=0), ScholarAuthor(author_id='50756914', url='', name='David van Dijk', affiliations=[], homepage=None, paper_count=0, citation_count=

### Partial Search

In [4]:
def partial_search(text) -> list[ScholarPaper]:
    url = f"https://api.semanticscholar.org/graph/v1/paper/autocomplete?query={text}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data.get("matches"):
            return [ScholarPaper.from_dict(paper) for paper in data["matches"]]
    elif response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    else:
        response.raise_for_status()

In [5]:
partial_search("Visualizing Structure and Transitions")

[ScholarPaper(paper_id='8ad65513498b428656d51cb097b66b6fd5798bd0', url='', title='Visualizing structure and transitions in high-dimensional biological data', abstract='', venue=None, publication_venue={}, year=None, reference_count=0, citation_count=0, influential_citation_count=0, fields_of_study=[], publication_types=[], publication_date='', journal={}, authors=[]),
 ScholarPaper(paper_id='57c2710ada3c2c75ca66378ea8bae42d96e0ad78', url='', title='Visualizing Structure and Transitions for Biological Data Exploration', abstract='', venue=None, publication_venue={}, year=None, reference_count=0, citation_count=0, influential_citation_count=0, fields_of_study=[], publication_types=[], publication_date='', journal={}, authors=[])]

### Enrich Papers & Authors


In [60]:
def enrich_papers(paper_ids: list[str], fields: str = DEFAULT_PAPER_FIELDS) -> list[ScholarPaper]:
    url = f"https://api.semanticscholar.org/graph/v1/paper/batch"
    params = { 'fields': fields }
    paper_ids = { 'ids': paper_ids }
    response = requests.post(url, params=params, json=paper_ids)
    if response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    data = response.json()
    return [ScholarPaper.from_dict(paper) for paper in data]

def enrich_authors(author_ids: list[str], fields: str = DEFAULT_AUTHOR_FIELDS) -> list[ScholarAuthor]:
    url = f"https://api.semanticscholar.org/graph/v1/author/batch"
    params = { 'fields': fields }
    author_ids = { 'ids': author_ids }
    response = requests.post(url, params=params, json=author_ids)
    if response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    data = response.json()
    return [ScholarAuthor.from_dict(author) for author in data]

In [None]:
paper_data = enrich_papers([paper.paper_id for paper in papers[:2]])
print(paper_data)

[ScholarPaper(paper_id='8ad65513498b428656d51cb097b66b6fd5798bd0', url='', title='Visualizing structure and transitions in high-dimensional biological data', abstract=None, venue='Nature Biotechnology', publication_venue={'id': '458166b3-de17-4bf3-bbbb-e53782de2f0f', 'name': 'Nature Biotechnology', 'type': 'journal', 'alternate_names': ['Nat Biotechnol'], 'issn': '1087-0156', 'url': 'http://www.nature.com/nbt/', 'alternate_urls': ['http://www.nature.com/nbt']}, year=2019, reference_count=89, citation_count=734, influential_citation_count=90, fields_of_study=[], publication_types=['JournalArticle'], publication_date='2019-12-01', journal={'name': 'Nature Biotechnology', 'pages': '1482 - 1492', 'volume': '37'}, authors=[ScholarAuthor(author_id='144493740', url='', name='Kevin R. Moon', affiliations=[], homepage=None, paper_count=0, citation_count=0, h_index=0), ScholarAuthor(author_id='50756914', url='', name='David van Dijk', affiliations=[], homepage=None, paper_count=0, citation_count

In [None]:
author_data = enrich_authors([author.author_id for author in paper_data[0].authors])
print(author_data)

[ScholarAuthor(author_id='144493740', url='https://www.semanticscholar.org/author/144493740', name='Kevin R. Moon', affiliations=[], homepage=None, paper_count=52, citation_count=3253, h_index=18), ScholarAuthor(author_id='50756914', url='https://www.semanticscholar.org/author/50756914', name='David van Dijk', affiliations=[], homepage=None, paper_count=51, citation_count=4919, h_index=27), ScholarAuthor(author_id='1720834', url='https://www.semanticscholar.org/author/1720834', name='Z. Wang', affiliations=[], homepage=None, paper_count=406, citation_count=12437, h_index=59), ScholarAuthor(author_id='144416511', url='https://www.semanticscholar.org/author/144416511', name='Scott A. Gigante', affiliations=[], homepage=None, paper_count=26, citation_count=1210, h_index=11), ScholarAuthor(author_id='145180324', url='https://www.semanticscholar.org/author/145180324', name='Daniel B. Burkhardt', affiliations=[], homepage=None, paper_count=29, citation_count=1491, h_index=15), ScholarAuthor(

### References and Citations

In [16]:
def get_paper_citations(paper_id):
    base_url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations"
    response = requests.get(base_url, params={})
    if response.status_code == 200:
        data = response.json()
        data = [d['citingPaper'] for d in data['data']]
        return ScholarPaper.from_list(data)
    elif response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    else:
        print(f"Error {response.status_code}: Unable to fetch citations for paper ID {paper_id}")
        return None
    
def get_paper_references(paper_id):
    base_url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/references"
    params = {}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        data = [d['citedPaper'] for d in data['data']]
        return ScholarPaper.from_list(data)
    elif response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    else:
        print(f"Error {response.status_code}: Unable to fetch citations for paper ID {paper_id}")
        return None

In [None]:
citations = get_paper_citations(paper_data[0].paper_id)
references = get_paper_references(paper_data[0].paper_id)

In [18]:
citations[0]

ScholarPaper(paper_id='1c831577164b6edc8662d9537b11926561a1aa1d', url='', title='Soybean yield estimation and lodging classification based on UAV multi-source data and self-supervised contrastive learning', abstract='', venue=None, publication_venue={}, year=None, reference_count=0, citation_count=0, influential_citation_count=0, fields_of_study=[], publication_types=[], publication_date='', journal={}, authors=[])

In [None]:
references[0]

ScholarPaper(paper_id='8044b4714eb27c12020326d3540d6b94652c6b35', url='', title='A comparison of single-cell trajectory inference methods', abstract='', venue=None, publication_venue={}, year=None, reference_count=0, citation_count=0, influential_citation_count=0, fields_of_study=[], publication_types=[], publication_date='', journal={}, authors=[])

# Testing API

In [7]:
from scholar.api import partial_search as partial_search_api, enrich_papers as enrich_papers_api, enrich_authors as enrich_authors_api

In [14]:
partial_search_results = partial_search_api("Visualizing Structure and Transitions")
print(partial_search_results)

[ScholarPaper(paper_id='8ad65513498b428656d51cb097b66b6fd5798bd0', url='', title='Visualizing structure and transitions in high-dimensional biological data', abstract='', venue=None, publication_venue={}, year=None, reference_count=0, citation_count=0, influential_citation_count=0, fields_of_study=[], publication_types=[], publication_date='', journal={}, authors=[]), ScholarPaper(paper_id='57c2710ada3c2c75ca66378ea8bae42d96e0ad78', url='', title='Visualizing Structure and Transitions for Biological Data Exploration', abstract='', venue=None, publication_venue={}, year=None, reference_count=0, citation_count=0, influential_citation_count=0, fields_of_study=[], publication_types=[], publication_date='', journal={}, authors=[])]


In [9]:
enriched_search_results = enrich_papers_api([paper.paper_id for paper in partial_search_results])

In [10]:
enriched_search_results[0]

ScholarPaper(paper_id='8ad65513498b428656d51cb097b66b6fd5798bd0', url='', title='Visualizing structure and transitions in high-dimensional biological data', abstract=None, venue='Nature Biotechnology', publication_venue={'id': '458166b3-de17-4bf3-bbbb-e53782de2f0f', 'name': 'Nature Biotechnology', 'type': 'journal', 'alternate_names': ['Nat Biotechnol'], 'issn': '1087-0156', 'url': 'http://www.nature.com/nbt/', 'alternate_urls': ['http://www.nature.com/nbt']}, year=2019, reference_count=89, citation_count=734, influential_citation_count=90, fields_of_study=[], publication_types=['JournalArticle'], publication_date='2019-12-01', journal={'name': 'Nature Biotechnology', 'pages': '1482 - 1492', 'volume': '37'}, authors=[ScholarAuthor(author_id='144493740', url='', name='Kevin R. Moon', affiliations=[], homepage=None, paper_count=0, citation_count=0, h_index=0), ScholarAuthor(author_id='50756914', url='', name='David van Dijk', affiliations=[], homepage=None, paper_count=0, citation_count=

In [11]:
enriched_authors = enrich_authors_api([author.author_id for author in enriched_search_results[0].authors])

In [12]:
enriched_authors[0]

ScholarAuthor(author_id='144493740', url='https://www.semanticscholar.org/author/144493740', name='Kevin R. Moon', affiliations=[], homepage=None, paper_count=52, citation_count=3253, h_index=18)