## Initialise Repository

In [1]:
import os

path = os.getcwd()
# find the string 'project' in the path, return index
index_project = path.find('project')
# slice the path from the index of 'project' to the end
project_path = path[:index_project+7]
# set the working directory
os.chdir(project_path)
print(f'Project path set to: {os.getcwd()}')

Project path set to: c:\Github\ode-biomarker-project


In [2]:
from PathLoader import PathLoader
path_loader = PathLoader('data_config.env', 'current_user.env')

In [3]:
from DataLink import DataLink
data_link = DataLink(path_loader, 'data_codes.csv')

In [4]:
# loading packages 

from tqdm import tqdm
from toolkit import *

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


## Retriving Data 

Create a list that stores all drug names 

In [18]:
gdsc = data_link.get_data_from_code('gdsc2')
# Select column 'DRUG_NAME' and make it unique by using set()
all_drug_names = list(set(gdsc['DRUG_NAME']))
all_drug_names

['BMS-345541',
 'AT13148',
 'Dactolisib',
 'Selumetinib',
 'Pyridostatin',
 'MK-2206',
 'BPD-00008900',
 'TAF1_5496',
 'Lapatinib',
 'Alpelisib',
 'Vincristine',
 'Bortezomib',
 'CDK9_5038',
 'IWP-2',
 'Cediranib',
 'RVX-208',
 'Paclitaxel',
 'Docetaxel',
 'OTX015',
 'Alisertib',
 'AZD7762',
 'Talazoparib',
 'Fulvestrant',
 'Vorinostat',
 'MK-8776',
 'Nutlin-3a (-)',
 'LJI308',
 'Epirubicin',
 'GSK2578215A',
 'Savolitinib',
 'Wnt-C59',
 'GSK2606414',
 'I-BRD9',
 'VX-11e',
 'Luminespib',
 'Dactinomycin',
 'Linsitinib',
 'Leflunomide',
 'AZD5363',
 'GNE-317',
 'OSI-027',
 'Ibrutinib',
 'Wee1 Inhibitor',
 'Carmustine',
 'Daporinad',
 'Pevonedistat',
 'ERK_6604',
 'EPZ5676',
 'PD0325901',
 'Gefitinib',
 'AZD5153',
 'Zoledronate',
 'JQ1',
 'Vinorelbine',
 'AZ960',
 'MIRA-1',
 '5-Fluorouracil',
 'SCH772984',
 'IRAK4_4710',
 'BMS-754807',
 'Vinblastine',
 'SB216763',
 'PRT062607',
 'WIKI4',
 'Entospletinib',
 'ABT737',
 'Osimertinib',
 'AZD3759',
 'WEHI-539',
 'KRAS (G12C) Inhibitor-12',
 'WZ

In [20]:
# load in dynamic features data 

available_drugs = []
for drug_name in tqdm(all_drug_names):
    if '-' in drug_name:
        continue
    # print(drug_name)
    loading_code = f'generic-gdsc-2-{drug_name}-LN_IC50-ccle_protein_expression-true-Cell_Line'
    # generic-gdsc-{number}-{drug_name}-{target_label}-{dataset_name}-{replace_index}-{row_index}
    feature_data, label_data = data_link.get_data_using_code(loading_code)
    # print(f'Data loaded for code {loading_code} Feature Shape {feature_data.shape} Label Shape {label_data.shape}')
    # if the feature data is not empty, append the drug name to the available_drugs list
    if feature_data.shape[0] > 0 and label_data.shape[0] > 0:
        available_drugs.append(drug_name)
        
available_drugs

100%|██████████| 192/192 [00:20<00:00,  9.39it/s]


['AT13148',
 'Dactolisib',
 'Selumetinib',
 'Pyridostatin',
 'TAF1_5496',
 'Lapatinib',
 'Alpelisib',
 'Vincristine',
 'Bortezomib',
 'CDK9_5038',
 'Cediranib',
 'Paclitaxel',
 'Docetaxel',
 'OTX015',
 'Alisertib',
 'AZD7762',
 'Talazoparib',
 'Fulvestrant',
 'Vorinostat',
 'LJI308',
 'Epirubicin',
 'GSK2578215A',
 'Savolitinib',
 'GSK2606414',
 'Luminespib',
 'Dactinomycin',
 'Linsitinib',
 'Leflunomide',
 'AZD5363',
 'Ibrutinib',
 'Wee1 Inhibitor',
 'Carmustine',
 'Daporinad',
 'Pevonedistat',
 'ERK_6604',
 'EPZ5676',
 'PD0325901',
 'Gefitinib',
 'AZD5153',
 'Zoledronate',
 'JQ1',
 'Vinorelbine',
 'AZ960',
 'SCH772984',
 'IRAK4_4710',
 'Vinblastine',
 'SB216763',
 'PRT062607',
 'WIKI4',
 'Entospletinib',
 'ABT737',
 'Osimertinib',
 'AZD3759',
 'WZ4003',
 'GSK269962A',
 'Tamoxifen',
 'GSK591',
 'AZD2014',
 'GSK343',
 'Niraparib',
 'Cytarabine',
 'Camptothecin',
 'Cisplatin',
 'Cyclophosphamide',
 'NU7441',
 'Sinularin',
 'Trametinib',
 'VE821',
 'Topotecan',
 'Sapitinib',
 'AZD1208',


In [21]:
len(available_drugs)

152

## Create Streamline Functions

In [None]:
folder_name = 'CANISRDatabase'

if not os.path.exists(f'{path_loader.get_data_path()}data/results/{folder_name}'):
    os.makedirs(f'{path_loader.get_data_path()}data/results/{folder_name}')

file_save_path = f'{path_loader.get_data_path()}data/results/{folder_name}/'

In [None]:
def pipeline(drug_name):
    loading_code = f'generic-gdsc-2-{drug_name}-LN_IC50-ccle_protein_expression-true-Cell_Line'
    feature_data, label_data = data_link.get_data_using_code(loading_code)
    print(f'Data loaded for code {loading_code} Feature Shape {feature_data.shape} Label Shape {label_data.shape}')
    
    # split the data into training and testing data
    