# iLINCS Freeze

## iLINCS Freeze: CSV

In [1]:
"""
iLINCS Freeze: CSV

Here we will attempt to freeze the iLINCS database. It will be stored to CSV files!

Structure:
    1. Imports, Variables, Functions
    2. Retrieve Data
    3. Parse Data
    4. Store Data

"""

# 1. Imports, Variables, Functions
# imports
import requests, os
import pandas as pd
import logging
# Remove any existing handlers associated with the root logger.
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Reconfigure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logging.info("iLINCS Freeze: CSV")

# variabels
OUTPUT_PATH = "../data/iLINCS"

# functions
def get_signatures():
    """
    get_signatures
    Retrieves a list of signatures from the iLINCS API.

    Parameters:
    - None

    Returns:
    - List[Dict]:
        A list of dictionaries, each representing a signature. Returns None in case of failure.
    """
    url = 'http://www.ilincs.org/api/SignatureMeta?' 
    url = 'http://www.ilincs.org/api/SignatureMeta?' 
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print("Failed to retrieve data")
        return None

def get_datasets():
    """
    get_datasets
    Retrieves a list of datasets from the iLINCS API.

    Parameters:
    - None

    Returns:
    - List[Dict]: 
        A list of dictionaries, each representing a dataset. Returns None in case of failure.
    """
    #url = 'http://www.ilincs.org/api/PublicDatasets?filter={"limit":1000}' 
    url = 'http://www.ilincs.org/api/PublicDatasets?' 
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to retrieve datasets")
        return None


def get_genes():
    """
    get_genes
    Retrieves gene information from the iLINCS API.

    Parameters:
    - None

    Returns:
    - List[Dict]: 
        A list of dictionaries, each representing a gene. Returns None in case of failure.
    """
    url = 'http://www.ilincs.org/api/GeneInfos?' 
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to retrieve genes")
        return None

def get_compounds():
    """
    get_compounds
    Retrieves a list of compounds from the iLINCS API.

    Parameters:
    - None

    Returns:
    - List[Dict]: A list of dictionaries, each representing a compound. Returns None in case of failure.
    """
    url = 'http://www.ilincs.org/api/Compounds?' 
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to retrieve compounds")
        return None


def save_to_csv(data, filename):
    """
    save_to_csv
    Saves given data to a CSV file.

    Parameters:
    - data: List[Dict]
        The data to be saved into a CSV file.
    - filename: str
        The name of the file to save the data into.

    Returns:
    - df
    """
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

    return df


def get_signature_data(signature_id):
    """
    get_signature_data
    Retrieves a list of compounds from the iLINCS API.

    Parameters:
    - signature_id: str()
        Signature ID of interest

    Returns:
    - List[Dict]: A list of dictionaries, each representing a signature. Returns None in case of failure.
    """
    url = f'http://www.ilincs.org/api/ilincsR/downloadSignature'
    payload = {'sigID': signature_id}
    response = requests.post(url, data=payload)
    if response.status_code == 200:
        return response.json()  # Or process the response as needed
    else:
        print(f"Failed to retrieve data for signature {signature_id}")
        return None

# 2. Retrieve Data
# get signatures
signatures = get_signatures()
logging.info(f"Nº Retrieved Signatures {len(signatures)}")

# get datasets
datasets = get_datasets()
logging.info(f"Nº Retrieved Datasets {len(datasets)}")

# get genes
genes = get_genes()
logging.info(f"Nº Retrieved Genes {len(genes)}")

# get compounds
compounds = get_compounds()
logging.info(f"Nº Retrieved Compounds {len(compounds)}")

# get signature vectors


# 3. Parse Data

# 4. Store Data
# parse & store signatures
df_signatures = save_to_csv(signatures,os.path.join(OUTPUT_PATH,"signatures.csv"))

# parse & store datasets
df_datasets = save_to_csv(datasets,os.path.join(OUTPUT_PATH,"datasets.csv"))

# parse & store genes
df_genes = save_to_csv(genes,os.path.join(OUTPUT_PATH,"genes.csv"))

# parse & store compounds
df_compounds = save_to_csv(compounds,os.path.join(OUTPUT_PATH,"compounds.csv"))


2023-11-24 10:58:47,295 - INFO - iLINCS Freeze: CSV


Failed to retrieve data


TypeError: object of type 'NoneType' has no len()

In [None]:
df_signatures

In [11]:
def get_signature_data(signature_id):
    """
    get_signature_data
    Retrieves a list of compounds from the iLINCS API.

    Parameters:
    - signature_id: str()
        Signature ID of interest

    Returns:
    - List[Dict]: A list of dictionaries, each representing a signature. Returns None in case of failure.
    """
    url = f'http://www.ilincs.org/api/ilincsR/downloadSignature'
    payload = {'sigID': signature_id}
    response = requests.post(url, data=payload)
    if response.status_code == 200:
        return response.json()  # Or process the response as needed
    else:
        print(f"Failed to retrieve data for signature {signature_id}")
        return None
    


In [12]:
a = df_signatures["signatureid"].to_list()[0]

b = get_signature_data(a)

In [20]:
len(b["data"]["signature"])

11911