# Semantic Scholar API

In [None]:
import httpx

In [None]:
doi = "10.1101/444398"

In [None]:
r1 = httpx.get(
    f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}?fields=title,url,abstract,authors"
)

In [None]:
def print_paper(paper: dict):
    print(paper["title"])
    print(paper["url"])
    print(paper["abstract"])
    print(paper["authors"])

In [None]:
j1 = r1.json()
print_paper(j1)

In [None]:
paper_id = j1["paperId"]

In [None]:
num_papers = 100

In [None]:
r2 = httpx.get(
    f"https://api.semanticscholar.org/recommendations/v1/papers/forpaper/{paper_id}?limit={num_papers}&fields=title,url,abstract,authors"
)  # ")

In [None]:
j2 = r2.json()

In [None]:
ids = [p["paperId"] for p in j2["recommendedPapers"]]
payload = {"ids": ids}

In [None]:
r3 = httpx.post(
    "https://api.semanticscholar.org/graph/v1/paper/batch?fields=title,isOpenAccess,openAccessPdf,authors,embedding,tldr",
    json=payload, timeout=50.0
)

In [None]:
j3 = r3.json()

In [None]:
payload

In [None]:
id_to_vector = {}
id_to_title = {}
for p in j3:
    id_ = p["paperId"]
    assert p["embedding"]["model"] == "specter@v0.1.1"
    id_to_vector[id_] = p["embedding"]["vector"]
    id_to_title[id_] = p["title"]

### UMAP embedding

In [None]:
import bokeh
import pandas as pd
import umap

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral10

output_notebook()

In [None]:
reducer = umap.UMAP(random_state=42)

In [None]:
df = pd.DataFrame(id_to_vector).transpose()
df

In [None]:
reducer.fit(df)

In [None]:
df_t = pd.DataFrame(reducer.transform(df))

In [None]:
df_t["id_"] = df.index.copy()
df_t["title"] = [id_to_title[i] for i in df_t["id_"]]
df_t

In [None]:
datasource = ColumnDataSource({str(c): v.values for c, v in df_t.items()})

In [None]:
tooltips = [
    ("(x,y)", "($x, $y)"),
    ("id", "@id_"),
    ("title", "@title"),
]

In [None]:
plot_figure = figure(
    title='UMAP projection of papers',
    width=800,
    height=800,
    tooltips=tooltips,
)

In [None]:
plot_figure.circle(
    '0',
    '1',
    source=datasource,
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4
)
show(plot_figure)

### BioGPT

In [None]:
from transformers import BioGptForCausalLM, BioGptTokenizer, pipeline, set_seed

In [None]:
model = BioGptForCausalLM.from_pretrained("microsoft/biogpt-large")
tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt-large")

In [None]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
set_seed(42)
generator("COVID-19 is", max_length=20, num_return_sequences=5, do_sample=True)

In [None]:
input_text = f"Task: please summarize the following article. Article: {j1['abstract']}"
input_text += " Summary: "
input_text

In [None]:
generator(input_text, max_new_tokens=200, num_return_sequences=5, do_sample=True)