In [1]:
import json

with open("credentials.json", "r") as f:
    credentials = json.load(f)

NOTION_KEY = credentials["integration_token"]
database_id = credentials["table_id"]

In [51]:
import requests
NOTION_URL = 'https://api.notion.com/v1/databases'
NOTION_VERSION = "2022-02-22"
HEADER = {"Authorization": f"Bearer {NOTION_KEY}",
                "Content-Type": "application/json",
                "Notion-Version": NOTION_VERSION}
body = {"sorts": []}

papers = []
while True:

    url = f"{NOTION_URL}/{database_id}/query"
    result = requests.post(url, data=json.dumps(body), headers=HEADER).json()
    papers.extend(result["results"])

    if result["has_more"]:
        body = {"sorts": [], "start_cursor": result["next_cursor"]}
    else:
        break

In [56]:
import pandas as pd

papers_df = pd.DataFrame(papers)

In [104]:
papers_df["properties"][0]["URL"]["url"]

'https://arxiv.org/pdf/2204.07118.pdf'

In [108]:
papers_df["properties"][1]["URL"]

'https://arxiv.org/pdf/2203.09795.pdf'

In [141]:
def get_status_tags(df):

    return df.apply(lambda x:
        [_["name"] for _ in x["properties"]["Status"]["multi_select"]], axis=1)

def get_paper_url(df):

    has_url = df.apply(lambda x: x["properties"]["URL"]["url"] is not None, axis=1)
    df = papers_df[has_url]
    return df.apply(lambda x: x["properties"]["URL"]["url"], axis=1)

In [142]:
get_paper_url(papers_df)

0                   https://arxiv.org/pdf/2204.07118.pdf
1                   https://arxiv.org/pdf/2203.09795.pdf
2                   https://arxiv.org/pdf/2103.17239.pdf
3                   https://arxiv.org/pdf/2204.12451.pdf
4                   https://arxiv.org/pdf/2202.01169.pdf
                             ...                        
245                 https://arxiv.org/pdf/2003.09229.pdf
246                 https://arxiv.org/pdf/2101.11605.pdf
247                 https://arxiv.org/pdf/2101.01169.pdf
249    https://arxiv.org/ftp/arxiv/papers/1612/1612.0...
258                     https://arxiv.org/abs/1909.11942
Length: 226, dtype: object

In [84]:
def get_starts(filtered_df): # returns index

    return filtered_df[filtered_df.apply(lambda x: "★" in x)].index

def get_unreads(filtered_df): # returns index

    return filtered_df[filtered_df.apply(lambda x: "Unread" in x)].index

In [99]:
papers_df.loc[get_starts(filtered_df)]

Unnamed: 0,object,id,created_time,last_edited_time,created_by,last_edited_by,cover,icon,parent,archived,properties,url
1,page,5326a5e4-0d7a-45a1-9c2e-a139ddf5de8d,2022-05-10T12:55:00.000Z,2022-05-10T12:56:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Three-things-everyone-sh...
2,page,c3b57ddc-c865-4988-8263-24f58a335e5d,2022-05-10T12:39:00.000Z,2022-05-10T12:55:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/CaiT-Going-deeper-with-I...
6,page,3248d6cf-5cb6-46c8-9dba-576717648e93,2022-05-07T13:36:00.000Z,2022-05-07T13:36:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Face-neurons-encode-nons...
8,page,01df37a8-9d00-416a-bf34-5e3dfb7d2038,2022-05-07T13:35:00.000Z,2022-05-07T13:35:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Memory-engrams-Recalling...
9,page,10428a73-4bbb-4dce-9e35-411ba0c4345a,2022-05-07T13:34:00.000Z,2022-05-07T13:35:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Brain-wide-mappings-reve...
36,page,bd0c6d75-46ba-48f8-94ec-d388a3df2d9f,2022-04-25T08:10:00.000Z,2022-04-25T08:33:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/VICReg-Variance-Invarian...
45,page,391b06c6-e1ed-4650-abc9-67c723c30d41,2022-04-21T13:47:00.000Z,2022-04-21T13:51:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Context-Autoencoder-for-...
48,page,321b3e38-48a8-4739-9f28-538730a8dba4,2022-04-21T13:30:00.000Z,2022-04-21T13:42:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/MaskGIT-Masked-Generativ...
49,page,89933fd6-6b7f-45ec-8611-68d627fcd338,2022-04-21T13:26:00.000Z,2022-04-21T13:30:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Fairness-Indicators-for-...
52,page,77167057-9007-4fdb-ac1d-21252685ee7b,2022-04-21T13:07:00.000Z,2022-04-21T13:11:00.000Z,"{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...","{'object': 'user', 'id': 'e3ac564e-c2e1-4507-a...",,,"{'type': 'database_id', 'database_id': '21e1a5...",False,"{'Date Published': {'id': '%3C%3EPK', 'type': ...",https://www.notion.so/Impact-of-Pretraining-Te...


In [82]:
papers_df[get_unreads(filtered_df)]

KeyError: "None of [Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,\n            ...\n            325, 326, 327, 328, 329, 330, 331, 332, 334, 335],\n           dtype='int64', length=249)] are in the [columns]"

In [67]:
filtered_df = papers_df.apply(lambda x:
[_["name"] for _ in x["properties"]["Status"]["multi_select"]], axis=1)

In [80]:
filtered_df[filtered_df.apply(lambda x: "★" in x)].index

Int64Index([  1,   2,   6,   8,   9,  36,  45,  48,  49,  52,  55,  70, 130,
            175, 177, 181, 187, 225, 226, 267, 268, 269, 270, 271, 282, 283,
            289, 290, 298, 300, 301, 302, 308],
           dtype='int64')