In [1]:
import csv
import os
import time
import requests
import json
import re

##Configurable Items
api_key_file = 'G:/My Drive/Lindsay Pino/proj/2023_scispot_utils/data/scispot_api_key.txt'
API_TOKEN = open(api_key_file, 'r').readlines()[0]

In [2]:
# fetch a Scispot entry based on the Sample ID
def fetch_entry_from_registryid(manager, registryid):
    # strip prefix from registry id
    
    registryid = re.sub("[^0-9]", "", registryid)
    
    session = requests.Session()
    url = "https://api.scispot.io/tryingtofixcors/labsheets/find-row-by-id"
    payload = {
        "apiKey": API_TOKEN,
        "labsheet": manager,
        "id": registryid,  #  value for "id" key should exclude the prefix and suffix 
        "idType": "ID_BARCODE"
    }
    ret = session.post(url, json=payload)
    return json.loads(ret.text)

def grab_uuid_from_row(row):
    return row['rows'][0][0]

# lookup test_digest in Labsheets
test_digest = "PDG97"  # peptides that ran for MSR97

test_fetch_uuid = grab_uuid_from_row(fetch_entry_from_registryid("Peptide Digest", test_digest))
test_fetch_uuid

'e8cb1f7c-0799-475e-8b8e-83170db0cbcf'

In [3]:
from typing import List, Literal

import json
import requests
import time

PROD_URL = "https://api.scispot.io/v2"


class APIException(Exception):
    def __init__(self, message, endpoint, payload, res):
        self.message = message + "\n"
        self.message += "Request: " + endpoint + "\n"
        annonimized_payload = {
            k: v if "api" not in k else "*" * len(v)
            for k, v in payload.items()
        }
        self.message += str(json.dumps(annonimized_payload, indent=2)) + "\n"
        self.message += "Response: " + str(json.dumps(res, indent=2)) + "\n"
        super().__init__(self.message)


class Labsheet:
    ADD_ROWS = "/labsheets/add-rows"
    UPDATE_ROWS = "/labsheets/update-rows"
    UPDATE_ROWS_BY_ID = "/labsheets/update-rows-by-id"
    UPDATE_ROWS_BY_COLTYPE = "/labsheets/update-rows-by-column-type"
    FIND_ROW = "/labsheets/find-row"
    FIND_ROW_BY_ID = "/labsheets/find-row-by-id"
    LIST_ROWS = "/labsheets/list-rows"
    DELETE_ROWS = "/labsheets/delete-rows"
    CREATE = "/labsheets/create"
    UPDATE_PARENT = "/labsheets/update-parent"
    UPDATE_CHILDREN = "/labsheets/update-children"

    def __init__(self, name, api_key, verbose=False, base="dev"):
        self.name = name
        self.api_key = api_key
        self.verbose = verbose
        if base == "prod":
            self.BASE_URL = PROD_URL
        else:
            raise APIException("Incorrect base. Must be 'prod'")

    ## Supports limited column types.
    ## Q: What are the non supported column types? - JSP
    def create(self, header_names, header_types):
        """Creates a labsheet."""
        columns = []
        for i in range(len(header_names)):
            columns.append(
                {
                    "position": i,
                    "name": header_names[i],
                    "type": header_types[i],
                }
            )
        payload = {"name": self.name, "columns": columns}
        res = self.make_request(self.CREATE, payload)
        self.error_check(res, payload, self.CREATE)
        print("successfully created labsheet.")
        return res

    def make_request(self, endpoint, payload):
        """Makes a request to the API.

        This is meant to be internal, for most cases you should use the
        other methods (e.g. add_rows, update_rows, etc).
        """
        payload["apiKey"] = self.api_key
        payload["manager"] = self.name
        payload["labsheet"] = self.name
        if self.verbose:
            print(self.BASE_URL + endpoint)
            # This will print the api key in plaintext...
            # which is a security vulnerability ... so lets not do that ...
            # print(str(json.dumps(payload, indent=4)))
            public_payload = {
                k: "*" * len(v) if "api" in k else v
                for k, v in payload.items()
            }

            print(str(json.dumps(public_payload, indent=4)))
        req = requests.post(url=self.BASE_URL + endpoint, json=payload)
        res = json.loads(req.text)
        return res

    def error_check(self, res, payload, endpoint) -> None:
        """Checks for errors in the response.

        It reads the response and raises a python error if an API
        error is detected.
        """
        if isinstance(res, dict):
            if "success" not in res:
                raise APIException(
                    "An error occured: ", endpoint, payload, res
                )

            # When would this happen?
            # why is this not consistent?
            if res["success"] == "false" or not res["success"]:
                raise APIException(
                    "An error occured: ", endpoint, payload, res
                )
        elif isinstance(res, list):
            """
            Example response from the update_rows endpoint:
            [
            {
                "ID": "98",
                "updatedRows": [
                {
                    "uuid": "50738ad7-c7e0-44da-9a29-3c0dec898075",
                    "success": "true"
                }
                ]
            }
            ]
            """
            for row in res:
                if "updatedRows" in row:
                    for updated_row in row["updatedRows"]:
                        if "success" not in updated_row:
                            raise APIException(
                                "An error occured: ", endpoint, payload, res
                            )

                        if (
                            updated_row["success"] == "false"
                            or not updated_row["success"]
                        ):
                            raise APIException(
                                "An error occured: ",
                                endpoint,
                                payload,
                                res,
                            )
                    return

                if "success" not in row:
                    raise APIException(
                        "An error occured: ", endpoint, payload, res
                    )

                if row["success"] == "false" or not row["success"]:
                    raise APIException(
                        "An error occured: ", endpoint, payload, res
                    )

    def add_rows(self, rows: List[List[str]]):
        """Adds rows to the table (labsheet).

        Parameters
        ----------
        rows : list of lists
            List of rows to add. Each row is a list of values.
            For example:
            --data '{
                "apiKey": "12345678-abcd-9012-efgh-345678901234",
                "manager": "Elisa Data",
                "rows": [
                    [
                        "ID-15",
                        "Standard",
                        "50mL",
                        "A",
                        "09/03/2022",
                        "Hazardous",
                        "Unknown",
                        "North Lab > Well Plate 23",
                        "134-amf"
                    ]
                ]}'

        Notes
        -----
        The column order is determined by the order of the columns in the labsheet.
        Which can be queried using the get_headers()["headers"] method.
        Having said that there are some oddities with the API, where the first column
        is skipped (thus not passed when adding rows), I am not sure what other columns
        are generated.
        """
        payload = {"rows": rows}
        res = self.make_request(self.ADD_ROWS, payload)
        print(res)
        self.error_check(res, payload, self.ADD_ROWS)
        print("successfully added rows.")
        return [row["uuid"] for row in sorted(res, key=lambda x: x["row"])]

    def get_headers(self) -> List[str]:
        """Returns the headers of the labsheet."""
        res = self.list_rows(0, 0)
        return res["headers"]

    # Usually, the uuid, stays packaged with the rest of the rows
    def update_rows(self, rows):
        payload = {"rows": [{"uuid": row[0], "data": row[1:]} for row in rows]}
        res = self.make_request(self.UPDATE_ROWS, payload)
        self.error_check(res, payload, self.UPDATE_ROWS)
        print("successfully updated rows.")
        return res

    def update_rows_by_id(self, rows: List[dict]):
        """Updates rows by id.

        Parameters
        ----------
        rows : list of dicts
            List of rows to update. Each row is a dict of values.
            For example (from the scispot docs):
            [{}, {"ID": "20c20", "Quantity": "78", "Comp": "c24"}]

        Notes from the docs
        -------------------
        curl --location 'https://api.scispot.io/v2/labsheets/update-rows-by-id' \
            --header 'Content-Type: application/json' \
            --data '{
            "apiKey": "12345678-abcd-9012-efgh-345678901234",
            "manager": "Materials Manager",
            "rows": [
                {
                "ID": "20c20",
                "Quantity": "78",
                "Comp": "c24"
                }
            ]
            }'
        """
        payload = {"rows": rows}; print(payload)
        res = self.make_request(self.UPDATE_ROWS_BY_ID, payload)
        self.error_check(res, payload, self.UPDATE_ROWS_BY_ID)
        print("successfully updated rows.")
        return res
    
    def update_rows_by_coltype(self, rows: List[dict]):
        """Update Row Data By Known Cell Value.
        Note: This feature is currently only supported for BATCH_ID column type.
        Would be cooler if it was supported for REGISTRY_ID :,)
        """
        payload = {"rows": rows}; print(payload)
        res = self.make_request(self.UPDATE_ROWS_BY_COLTYPE, payload)
        self.error_check(res, payload, self.UPDATE_ROWS_BY_COLTYPE)
        print("successfully updated rows.")
        return res

    def find_row(self, id, id_type: Literal["uuid", "id", "barcode"] = "uuid"):
        """Finds a row by id.

        Examples
        --------
        >>> ls.find_row("b543a089-dee2-42f0-a750-91effe49841c", id_type="uuid")
        {"headers": [...], "row": [...], "success": "true"}
        >>> ls.find_row("1102", id_type="barcode")
        {"rows": [[...]], "headers": [...], "success": True}
        # Note that the response from uuid is a single list,
        # whilst the response from barcode is a list of lists.
        """
        if id_type == "uuid":
            payload = {"uuid": id}
            res = self.make_request(self.FIND_ROW, payload)
            self.error_check(res, payload, self.FIND_ROW)
            print("successfully found row.")
            return res
        elif id_type == "id":
            payload = {"id": id}
        elif id_type == "barcode":
            payload = {"id": id, "idType": "ID_BARCODE"}
        else:
            raise APIException(
                "Incorrect ID type. Must be 'id', 'barcode', 'uuid'"
            )
        res = self.make_request(self.FIND_ROW_BY_ID, payload)
        self.error_check(res, payload, self.FIND_ROW)
        print("successfully found row.")
        return res

    def list_rows(self, pageSize, page=1):
        # Q: What is the max page size? - JSP
        payload = {"pageSize": pageSize, "page": page}
        res = self.make_request(self.LIST_ROWS, payload)
        self.error_check(res, payload, self.LIST_ROWS)
        print("successfully listed rows.")
        return res

    def delete_rows(self, uuids):
        payload = {"uuids": uuids}
        res = self.make_request(self.DELETE_ROWS, payload)
        self.error_check(res, payload, self.DELETE_ROWS)

    # Supports the common operation of fetching for a certain row, applying some operations, and returning it.
    def find_then_update(self, callback, id, id_type="uuid"):
        data = self.find_row(id, id_type)
        ret = callback(data)
        self.update_rows(ret)

    ##ONLY BARCODES FOR NOW
    def update_parent(self, child_id, parent_id, parent_labsheet):
        payload = {
            "rows": {
                "barcode": child_id,
                "parent": {"labsheet": parent_labsheet, "barcode": parent_id},
            }
        }
        res = self.make_request(self.UPDATE_PARENT, payload)
        self.error_check(res, payload, self.UPDATE_PARENT)
        print("successfully updated parent.")

    def create_children(self, parent_barcode, child_ids):
        payload = {
            "idType": "ID_BARCODE",
            "parent": parent_barcode,
            "children": [{"ID": child, "quantity": 0} for child in child_ids],
        }
        res = self.make_request(self.UPDATE_CHILDREN, payload)
        self.error_check(res, payload, self.UPDATE_CHILDREN)
        print("successfully created children.")


# Give mapping of parent to children barcodes
def create_parent_child(mapping, parent_sheet, child_sheet):
    update_parent_rows = []
    update_child_rows = []
    for parent in mapping:
        children = mapping[parent]
        for child in children:
            update_parent_rows.append(
                {
                    "barcode": child,
                    "parent": {
                        "labsheet": parent_sheet.name,
                        "barcode": parent,
                    },
                }
            )
        update_child_rows.append(
            {
                "barcode": parent,
                "children": [
                    {"labsheet": child_sheet.name, "barcode": child}
                    for child in children
                ],
            }
        )
    res1 = parent_sheet.make_request(
        parent_sheet.UPDATE_CHILDREN, {"rows": update_child_rows}
    )
    time.sleep(1)
    res2 = child_sheet.make_request(
        parent_sheet.UPDATE_PARENT, {"rows": update_parent_rows}
    )
    parent_sheet.error_check(
        res1, {"rows": update_child_rows}, parent_sheet.UPDATE_PARENT
    )
    child_sheet.error_check(
        res2, {"rows": update_parent_rows}, child_sheet.UPDATE_CHILDREN
    )


def find_values_by_header(res, header):
    if "rows" in res and "headers" in res:
        values = []
        for row in res["rows"]:
            values.append(row[res["headers"].index(header)])
        return values
    else:
        raise APIException(
            "Incorrect response format. Must have 'rows' and 'headers' keys."
        )


def array_to_dict(header, data):
    return dict(zip(header, data))


def dict_to_array(data):
    if type(data) is list:
        return {
            "headers": list(data[0].keys()),
            "rows": [list(row.values()) for row in data],
        }
    return {"headers": list(data.keys()), "rows": list(data.values())}


In [4]:
#from src.wrapper import Labsheet
#from dotenv import load_dotenv
import os

# File in the same directory as this notebook
# notebooks/.testenv.env
# content is like:
# SCISPOT_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#load_dotenv("./.testenv.env")

ls_msr = Labsheet(
    name="MS Run",
    api_key=API_TOKEN,
    verbose=False,
    base="prod",
)

ls_msc = Labsheet(
    name="MS Column",
    api_key=API_TOKEN,
    verbose=False,
    base="prod",
)

ls_pdg = Labsheet(
    name="Peptide Digest",
    api_key=API_TOKEN,
    verbose=False,
    base="prod",
)

ls_fra = Labsheet(
    name="Cell Fraction",
    api_key=API_TOKEN,
    verbose=False,
    base="prod",
)



In [6]:
ls_msr.find_row("374", id_type="barcode")["rows"]

successfully found row.


[['5b52caf0-7c3f-46cf-9977-535c75da78dc',
  'MSR374',
  'SET4REP1A4_TAL0000517',
  '10/12/2023',
  'Daniele Canzani',
  'DDA',
  '44',
  '',
  'Evosep',
  'S2',
  '',
  '',
  '100',
  'ng',
  '',
  '',
  '',
  '420604005932',
  '',
  '',
  '',
  '']]

In [5]:
# MSR374 --> mass_spec_run_ids = ["374"]  this worked in sandbox...
# Does not work in prod because Registry ID == MSR374 maps to Run Name == SET4REP1A4_TAL0000517 
# and SET4REP1A4_TAL0000517 maps to multiple MSR

column_id_list = ["1"]
mass_spec_run_ids = ["374"]
for cid, msr in zip(column_id_list, mass_spec_run_ids):
    column_uuid = ls_msc.find_row(cid, id_type="barcode")["rows"][0][0]
    mass_spec_run_name = ls_msr.find_row(msr, id_type="barcode")["rows"][0][2]
    print(f">>> Found column uuid => {column_uuid}")
    print(f">>> Found mass spec Run Name => {mass_spec_run_name}")
    res = ls_msr.update_rows_by_id(
        rows=[
            {
                #"Registry ID": msr,
                "Run Name": mass_spec_run_name,
                "Column ID": column_uuid,
            }
        ]
    )
    print(f"Updated MSR -> {msr} with column -> {cid}")
    
    
##
##
## MS Runs labsheet updates are not possible, at least with the "update_rows_by_id" endpoint.
## This is because for all of SET*REP1 samples, there are duplicate rows with the same Run Name
## from having a DDA and a DIA run (sometimes, for SET1REP1, there's multiple DIA runs).
## To backlink things in MS Runs Labsheet, I need to figure out how to use another endpoint 
## maybe update-rows-by-column-type (in the future... Right now, this feature is currently only 
## supported for BATCH_ID column type, so wouldn't work for Registry ID.
##
##

successfully found row.
successfully found row.
>>> Found column uuid => e03cfa54-0e6a-4280-83cc-5d6797d8d6dc
>>> Found mass spec Run Name => SET4REP1A4_TAL0000517
{'rows': [{'Run Name': 'SET4REP1A4_TAL0000517', 'Column ID': 'e03cfa54-0e6a-4280-83cc-5d6797d8d6dc'}]}


APIException: An error occured: 
Request: /labsheets/update-rows-by-id
{
  "rows": [
    {
      "Run Name": "SET4REP1A4_TAL0000517",
      "Column ID": "e03cfa54-0e6a-4280-83cc-5d6797d8d6dc"
    }
  ],
  "apiKey": "************************************",
  "manager": "MS Run",
  "labsheet": "MS Run"
}
Response: [
  {
    "ID": "SET4REP1A4_TAL0000517",
    "updatedRows": [
      {
        "uuid": "99808d08-cbcc-4a28-bee4-914f646b775e",
        "success": "true"
      },
      {
        "uuid": "5b52caf0-7c3f-46cf-9977-535c75da78dc",
        "success": "false",
        "message": "the option 04398e98-8483-4695-8a0e-48b3ccadc84d is invalid"
      },
      {
        "uuid": "d3603318-7e08-4825-b61d-471c547cd1de",
        "success": "true"
      }
    ]
  }
]


In [None]:
import pandas as pd

# read in a CSV with all the mass spec runs
manual_msr_dda = pd.read_csv('G:/My Drive/Lindsay Pino/proj/2023_scispot_utils/data/SET1 REP1 Non-scripted metadata for Scispot - S1R1 MS Run - DIA.csv')
manual_msr_dda

# get a list of all the MS Run registry IDs
registry_ids = []
for registry_id in manual_msr_dda['Registry ID']:
    registry_id = re.sub("[^0-9]", "", registry_id)
    registry_ids.append(registry_id)

# get a list of all the MS Run registry IDs
digest_ids = []
for digest_id in manual_msr_dda['Peptide Digest IDs']:
    digest_id = re.sub("[^0-9]", "", digest_id)
    digest_ids.append(digest_id)

# Example ->
# lets add to the ms run sheet on id
# MSR98
# MSC1 <- column id 2
# PDG197 <- protein digest id 197

column_id_list = ["1"]*len(registry_ids)
mass_spec_run_ids = registry_ids
for cid, msr in zip(column_id_list, mass_spec_run_ids):
    column_uuid = ls_msc.find_row(cid, id_type="barcode")["rows"][0][0]
    print(f">>> Found column uuid => {column_uuid}")
    res = ls_msr.update_rows_by_id(
        rows=[
            {
                "Registry ID": msr,
                "Column ID": column_uuid,
            }
        ]
    )
    print(f"Updated MSR -> {msr} with column -> {cid}")

In [None]:
for pdgid, msr in zip(digest_ids, mass_spec_run_ids):
    print(pdgid)
    print(msr)
    digest_uuid = ls_pdg.find_row(pdgid, id_type="barcode")["rows"][0][0]
    print(f">>> Found peptide digest uuid => {digest_uuid}")
    res = ls_msr.update_rows_by_id(
        rows=[
            {
                "Registry ID": msr,
                "Peptide Digest IDs": digest_uuid,
            }
        ]
    )
    print(f"Updated mass spec run MSR -> {msr} with link to peptide digest PDG -> {pdgid}")
    
# SET1 REP1 metadata sheet goes from PDG97-PDG288 with corresponding MSR1-MSR192
# the sandbox has PDG1-PDG97 and MSR1-MSR193
# so sandbox has all the MSR but only 1 of the corresponding PDG lol omg

In [None]:
# gonna try to go back one more linkage and "test" these loops there...


# read in a CSV with all the mass spec runs
manual_pdg_nuc = pd.read_csv('G:/My Drive/Lindsay Pino/proj/2023_scispot_utils/data/SET1 REP1 Non-scripted metadata for Scispot - S1R1 Peptide Digest - Nucleoplasm TEST.csv')
manual_pdg_nuc

# get a list of all the MS Run registry IDs
registry_ids = []
for registry_id in manual_pdg_nuc['Registry ID']:
    registry_id = re.sub("[^0-9]", "", registry_id)
    registry_ids.append(registry_id)

# get a list of all the MS Run registry IDs
fraction_ids = []
for fraction_id in manual_pdg_nuc['Parent Sample']:
    fraction_id = re.sub("[^0-9]", "", fraction_id)
    fraction_ids.append(fraction_id)

In [None]:
for fraid, pdgid in zip(fraction_ids, registry_ids):
    print(fraid)
    print(pdgid)
    fraction_uuid = ls_fra.find_row(fraid, id_type="barcode")["rows"][0][0]
    print(f">>> Found fraction uuid => {fraction_uuid}")
    res = ls_pdg.update_rows_by_id(
        rows=[
            {
                "Registry ID": pdgid,
                "Parent Sample": fraction_uuid,
            }
        ]
    )
    print(f"Updated peptide digest PDG -> {pdgid} with protein fraction parent FRA -> {fraid}")
    