# System API Synthesis Demo Notebook

This notebook provides basic examples of how to use System's API to conduct synthesis. Full documentation is available on our [API documentation site](https://api-docs.system.com/).

Running these examples requires you to have an API key, provided by System. Please contact us at [api@system.com](mailto:api@system.com) if you are interested in exploring our public API.

## Setup Notebook

### Import libraries and define helper functions

In [None]:
!pip install api-client==1.45.0 --extra-index-url https://pypi.fury.io/systeminc/

Looking in indexes: https://pypi.org/simple, https://pypi.fury.io/systeminc/
Collecting api-client==1.45.0
  Downloading https://pypi.fury.io/systeminc/-/ver_TiZ6p/api_client-1.45.0-py3-none-any.whl (124 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.2/124.2 kB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Collecting typing-extensions>=4.7.1
  Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)
Collecting pydantic>=2
  Downloading pydantic-2.8.2-py3-none-any.whl (423 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.9/423.9 kB[0m [31m47.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydantic-core==2.20.1
  Downloading pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m147.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting annotated-types>=0.4.0
  Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)
Installing collec

In [None]:
import api_client
import os
import pandas as pd
import json

In [None]:
import math

def fetch_all_paginated_data(fetch_function, job_id, page_size=1000):
    """
    Fetches all data using pagination.
    
    :param fetch_function: The API function to call
    :param job_id: The job ID for the search
    :param page_size: Number of items to fetch per page
    :return: List of all fetched data
    """
    all_data = []
    total = None
    offset = 0

    while total is None or offset < total:
        response = fetch_function(job_id, include_total=True, limit=page_size, offset=offset)
        all_data.extend(response.data)
        
        if total is None:
            total = response.total
        
        offset += page_size
        
    return all_data

### Create a service client and initialize endpoints

In [1]:
%env YOUR_API_KEY=<YOUR_API_KEY>

env: YOUR_API_KEY=<YOUR_API_KEY>


In [None]:
from api_client import ApiClient, SynthesisApi, Configuration

API_KEY = os.environ.get("YOUR_API_KEY")
if not API_KEY:
	raise ValueError("API key is missing")

system_client = ApiClient(header_name="x-api-key", header_value=API_KEY)

synthesis_api = SynthesisApi(system_client)


##  Initiate a pubmed query search

In [None]:
from api_client import PubmedSearchSynthesisInput

query="kras"
job_id = synthesis_api.synthesize_pubmed_search(pubmed_search_synthesis_input=PubmedSearchSynthesisInput(query=query))

In [None]:
print(f"Synthesis started with job_id: {job_id}")

Synthesis started with job_id: TZJiTbX_pm


## Get Synthesis and Statements

### Check on Synthesis status

In [None]:
from api_client import JobStatus
import time
from IPython.display import clear_output

# Assuming synthesis_api and job_id are already defined
start_time = time.time()
time_taken = time.time() - start_time

status = synthesis_api.get_pubmed_search_synthesis_by_id(job_id)
running_statuses = [
    JobStatus.ACCEPTED, JobStatus.CLUSTERING, JobStatus.SYNTHESIZING, JobStatus.SYNTHESIS_RUNNING
]

while status.status in running_statuses:
    # Clear the previous output
    clear_output(wait=True)
    
    # Calculate time taken
    time_taken = time.time() - start_time
    
    # Display current status and time taken
    print(f"Current status: {status}")
    print(f"Time taken: {time_taken:.2f} seconds")
    
    # Wait for a short interval before checking again
    time.sleep(1)
    
    # Update the status
    status = synthesis_api.get_pubmed_search_synthesis_by_id(job_id)

# After the loop ends, show the final status
print(f"Final status: {status.status}")
print(f"Total time taken: {time_taken:.2f} seconds")

Current status: id='TZJiTbX_pm' status=<JobStatus.SYNTHESIZING: 'synthesizing'> created_at=datetime.datetime(2024, 8, 8, 2, 40, 55, 553485) updated_at=datetime.datetime(2024, 8, 8, 2, 41, 5, 622877) user_query='kras' pubmed_query='"kras"[All Fields]' clustering_finished=datetime.datetime(2024, 8, 8, 2, 41, 4, 618668) synthesis_finished=None
Time taken: 18.37 seconds
Final status: synthesis_success
Total time taken: 18.37 seconds


### Get Synthesis and Supporting Resources

In [None]:
synthesis = synthesis_api.get_synthesis_from_pubmed_search(job_id)
statements = fetch_all_paginated_data(synthesis_api.get_statements_from_pubmed_search, job_id)
studies = fetch_all_paginated_data(synthesis_api.get_studies_from_pubmed_search, job_id)
statistical_findings = fetch_all_paginated_data(synthesis_api.get_statistical_findings_from_pubmed_search, job_id)
mechanistic_findings = fetch_all_paginated_data(synthesis_api.get_mechanistic_findings_from_pubmed_search, job_id)

studies_df = pd.DataFrame([json.loads(study.json()) for study in studies])
statistical_findings_df = pd.DataFrame([json.loads(finding.json()) for finding in statistical_findings])
mechanistic_findings_df = pd.DataFrame([json.loads(finding.json()) for finding in mechanistic_findings])
statements_df = pd.DataFrame([json.loads(statement.json()) for statement in statements])

# Show Synthesis Output

In [None]:
from IPython.display import display, HTML

import re

def extract_citations(text):
    citations = re.findall(r'\[(\d+)\]', text)
    return set(citations)

citations = set()
for result in synthesis.summaries:
    theme = result['theme']
    summary = result['summary'].replace('\n', '<br>')
    citations.update(extract_citations(result['summary']))
    
    output = f"""
    <div style="margin-bottom: 20px;">
        <strong>Theme:</strong> {theme}<br><br>
        <strong>Summary:</strong><br>
        {summary}
    </div>
    <hr>
    """
    display(HTML(output))

In [None]:
cited_statements = {}
for citation in sorted(citations, key=int):
    citation_idx = int(citation) - 1
    cited_statements[citation_idx+1] = statements[citation_idx]

from IPython.display import display, HTML

def display_cited_statements(cited_statements):
    html_content = """
    <style>
        .cited-statements {
            font-family: Arial, sans-serif;
            border-collapse: collapse;
            width: 100%;
            margin-bottom: 20px;
        }
        .cited-statements td, .cited-statements th {
            border: 1px solid #ddd;
            padding: 8px;
        }
        .cited-statements tr:nth-child(even) {
            background-color: #f2f2f2;
        }
        .cited-statements tr:hover {
            background-color: #ddd;
        }
        .cited-statements th {
            padding-top: 12px;
            padding-bottom: 12px;
            text-align: left;
            background-color: #4CAF50;
            color: white;
        }
    </style>
    <table class="cited-statements">
        <tr>
            <th>Citation</th>
            <th>Statement</th>
        </tr>
    """
    
    for citation, statement in cited_statements.items():
        html_content += f"""
        <tr>
            <td>[{citation}]</td>
            <td>{statement.summary}, num_findings: {len(statement.finding_ids)}</td>
        </tr>
        """
    
    html_content += "</table>"
    
    display(HTML(html_content))

# Use the function
display_cited_statements(cited_statements)

Citation,Statement
[1],"KRAS mutations is associated with survival, but the strength of the effect is mixed., num_findings: 28"
[2],"KRAS testing is associated with colorectal cancer prognosis, but the strength of the effect is mixed., num_findings: 37"
[4],"KRAS G13D-variant is associated with distant metastatic colorectal tumor samples, but the strength of the effect is mixed., num_findings: 25"
[5],"KRAS mutation is associated with survival from PDAC, but the strength of the effect is mixed., num_findings: 31"
[6],"In most available studies, KRAS mutations is associated with increases in progression-free survival (of metastatic colorectal cancer)., num_findings: 28"
[7],"In most available studies, KRAS mutations is associated with decreases in overall survival (of NSCLC patients)., num_findings: 19"
[15],"In most available studies, KRAS mutations is associated with decreases in overall survival (among patients with non-small-cell lung cancer)., num_findings: 9"
[19],"KRAS mutations is associated with Overall Survival (among patients with KRAS-mutated advanced colorectal cancer), but the strength of the effect is mixed., num_findings: 6"
[21],"KRAS mutations is associated with disease dissemination (among colon cancer patients), but the strength of the effect is mixed., num_findings: 16"
[52],"KRAS mutations is associated with increases in tumor grade., num_findings: 2"


In [None]:
statistical_findings_df

Unnamed: 0,id,flagged,finding_type,summary,topic_1,topic_2,variable_1,variable_2,study,statistic_type,statistic_value,ci_upper,ci_lower,p_value
0,EstL4te5Fy,False,statistical,patients with KRAS mutation (KRASm) [vs patien...,"{'id': '6YmesoiLnT', 'name': 'ras gene family'}","{'id': 'W6qftQ1CpV', 'name': 'progression-free...","{'id': 'x0aW3B9GFQ', 'name': 'patients with KR...","{'id': '_4sqTSr6ev', 'name': 'Progression-free...","{'id': 'g32gaM6OSN', 'doi': '10.1016/j.ejca.20...",mean_difference_unstandardized,-24.00,,,0.003
1,Xdx-GYv-SI,False,statistical,patients with KRAS mutation (KRASm) [vs patien...,"{'id': '6YmesoiLnT', 'name': 'ras gene family'}","{'id': 'bxy3dykVii', 'name': 'survival rate'}","{'id': 'x0aW3B9GFQ', 'name': 'patients with KR...","{'id': 'NGPYuxmNnf', 'name': 'Overall survival...","{'id': 'g32gaM6OSN', 'doi': '10.1016/j.ejca.20...",mean_difference_unstandardized,-29.70,,,0.004
2,aVzZHXnzgG,False,statistical,Greenlandic population diagnosed with CRC in 2...,"{'id': 'McFqOyw_JT', 'name': 'colorectal neopl...",,"{'id': 'x2Amrqb9F_', 'name': 'Greenlandic popu...","{'id': 'Y6wHsRWAzk', 'name': 'Frequency of KRA...","{'id': 'zkgmtG77Sm', 'doi': '10.1111/apm.13254...",mean_difference_unstandardized,16.00,,,0.001
3,lRN8XzszRy,False,statistical,Tumors with invasive implants/LGSC [vs Tumors ...,,,"{'id': '2prvEXw196', 'name': 'Tumors with inva...","{'id': 'ZgwayC7lHS', 'name': 'KRAS mutation'}","{'id': 'Bf2AmtzS8F', 'doi': '10.1007/s00428-02...",mean_difference_unstandardized,17.00,,,0.310
4,RLDH9DN9AI,False,statistical,Patients with KRAS mutation [vs Patients witho...,"{'id': '6YmesoiLnT', 'name': 'ras gene family'}",,"{'id': 'eQYB4N9cdz', 'name': 'Patients with KR...","{'id': 'WpuLmXhAFc', 'name': 'High-stage disea...","{'id': 'Bf2AmtzS8F', 'doi': '10.1007/s00428-02...",mean_difference_unstandardized,-8.00,,,0.640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2855,2ksRkgyZ3B,False,statistical,Well-Differentiated Neuroendocrine Carcinoma (...,"{'id': 'o4y8pYY8y4', 'name': 'neuroendocrine c...","{'id': 'vUwiaNEBVo', 'name': 'survival'}","{'id': 'orGx5Tl22X', 'name': 'Well-Differentia...","{'id': 'BjqavCwVTn', 'name': 'Median Survival'}","{'id': 'C454YGhYw5', 'doi': '10.1007/s00535-01...",mean_difference_unstandardized,41.00,,,0.227
2856,btogrD2nfj,False,statistical,Well-Differentiated Neuroendocrine Carcinoma (...,"{'id': 'o4y8pYY8y4', 'name': 'neuroendocrine c...",,"{'id': 'orGx5Tl22X', 'name': 'Well-Differentia...","{'id': 'erHxGvVhgN', 'name': 'Rb Immunopositiv...","{'id': 'C454YGhYw5', 'doi': '10.1007/s00535-01...",mean_difference_unstandardized,86.00,,,0.015
2857,KPOK212_F_,False,statistical,Ulcerative colitis with low (UC-LR) group [vs ...,"{'id': 'ad6dwiIOQU', 'name': 'ulcerative colit...",,"{'id': 'se2WiDxXg1', 'name': 'Ulcerative colit...","{'id': 'QK7xSZSskB', 'name': 'frequency of p53...","{'id': 'On_W7IeWEg', 'doi': '10.1016/j.crohns....",mean_difference_unstandardized,-46.70,,,0.050
2858,Px8A2T1tqn,False,statistical,Iperihilar cholangiocarcinoma group [vs Eperih...,"{'id': '1kbmy7R4g3', 'name': 'cholangiocarcino...","{'id': '6YmesoiLnT', 'name': 'ras gene family'}","{'id': 'n-keehxvUo', 'name': 'Iperihilar chola...","{'id': 'rsidNbHkow', 'name': 'mutation frequen...","{'id': 'XkGd0Cqrkf', 'doi': '10.5858/arpa.2023...",mean_difference_unstandardized,16.70,,,0.030


In [None]:
mechanistic_findings_df

Unnamed: 0,id,flagged,finding_type,summary,topic_1,topic_2,variable_1,variable_2,study,mechanism_type
0,b09sf4KHUi,False,mechanistic,,"{'id': 'ZID6hiZiyU', 'name': 'KRAS'}","{'id': 'OXt6jy0wqh', 'name': 'neoplasm'}","{'id': 'EGWlup-mGE', 'name': 'KRAS'}","{'id': 'LYaiZ1yCbp', 'name': 'Neoplasms'}","{'id': 'QzU6dhsyHq', 'doi': '10.3390/ijms23084...",Activation
1,WYmMWF17BM,False,mechanistic,,"{'id': '51IRfGK6wf', 'name': 'PTPN11'}","{'id': 'OXt6jy0wqh', 'name': 'neoplasm'}","{'id': 'Bkx0cIK3BI', 'name': 'PTPN11'}","{'id': 'LYaiZ1yCbp', 'name': 'Neoplasms'}","{'id': 'yx8-pkVyqa', 'doi': '10.1016/j.ctrv.20...",Activation
2,AP15COQeJG,False,mechanistic,,"{'id': 'gVu5cKJraO', 'name': 'sos1'}","{'id': 'OXt6jy0wqh', 'name': 'neoplasm'}","{'id': 'clskjHOzCy', 'name': 'SOS1'}","{'id': 'LYaiZ1yCbp', 'name': 'Neoplasms'}","{'id': 'yx8-pkVyqa', 'doi': '10.1016/j.ctrv.20...",Activation
3,SmxVCGcWgm,False,mechanistic,,"{'id': 'MT7T8mj3pO', 'name': 'signal transduct...","{'id': 'EukymkmUae', 'name': 'cell population ...","{'id': 'CRKgrii9F1', 'name': 'signal transduct...","{'id': 'QKx3aWctDv', 'name': 'cell population ...","{'id': 'DWgWf3yLLI', 'doi': '10.1007/s40291-02...",Activation
4,052ZO0DiVI,False,mechanistic,,,"{'id': '9dQYb_73YK', 'name': 'guanosine tripho...","{'id': 'A8kBJfuB6f', 'name': 'GDP'}","{'id': 'h-I7ZjbRuW', 'name': 'GTP'}","{'id': 'c6dhOLOWfo', 'doi': '10.1111/joim.1305...",Inhibition
...,...,...,...,...,...,...,...,...,...,...
12152,Y4ECr2OGNk,False,mechanistic,,"{'id': 'gVu5cKJraO', 'name': 'sos1'}","{'id': 'Ot6TE8R2_D', 'name': 'gnl3'}","{'id': 'clskjHOzCy', 'name': 'SOS1'}","{'id': 'q8pSBYLfyB', 'name': 'GNL3'}","{'id': 'LZfBBg87Oa', 'doi': '10.1016/j.ejmg.20...",Activation
12153,tWgFt2B1kh,False,mechanistic,,"{'id': '51IRfGK6wf', 'name': 'PTPN11'}","{'id': 'Ot6TE8R2_D', 'name': 'gnl3'}","{'id': 'Bkx0cIK3BI', 'name': 'PTPN11'}","{'id': 'q8pSBYLfyB', 'name': 'GNL3'}","{'id': 'LZfBBg87Oa', 'doi': '10.1016/j.ejmg.20...",Activation
12154,YF-KRb8-0K,False,mechanistic,,"{'id': 'ZID6hiZiyU', 'name': 'KRAS'}","{'id': 'Ot6TE8R2_D', 'name': 'gnl3'}","{'id': 'EGWlup-mGE', 'name': 'KRAS'}","{'id': 'q8pSBYLfyB', 'name': 'GNL3'}","{'id': 'LZfBBg87Oa', 'doi': '10.1016/j.ejmg.20...",Activation
12155,7doEcuQC5j,False,mechanistic,,"{'id': 'SR54laVCr_', 'name': 'EGF'}",,"{'id': 'JHOJvzDt5P', 'name': 'EGF'}","{'id': 'SbOltNnC1i', 'name': 'localization'}","{'id': '855od3y5P8', 'doi': '10.1074/jbc.m109....",Activation


In [None]:
statements_df

Unnamed: 0,summary,finding_ids,finding_type
0,"KRAS mutations is associated with survival, bu...","[Xdx-GYv-SI, URDtceDtrO, zi9ZoiyZcl, SdaTJ6vhj...",statistical
1,KRAS testing is associated with colorectal can...,"[_Stft2XAhB, GPmZ6bT3o2, uKTGMh3OsI, 1eyidH5jc...",statistical
2,mutant KRAS is associated with response (of KR...,"[7AcXCurdDm, 6iGuNkIUco, 7mMeYmRRx-, wSdJXizct...",statistical
3,KRAS G13D-variant is associated with distant m...,"[lxCUYzDfMf, XBEpFPNXld, d5QqM7piU3, QQl9dF56R...",statistical
4,KRAS mutation is associated with survival from...,"[H4Yt7a7gZd, ilDPCNz7jM, 19A4wVruq4, 19A4wVruq...",statistical
...,...,...,...
1746,rat sarcoma wild-type is associated with decre...,[xWp0gYrjux],statistical
1747,RHOA positively regulates cell differentiation.,[3uYORZbhSU],mechanistic
1748,CSNK1 phosphorylates LGALS3.,[mbbUs2Y6e5],mechanistic
1749,oligometastatic status is associated with incr...,[FciQ-k9Ue3],statistical
