In [None]:
##Load dependencies and custom functions

from datetime import datetime
from csv import DictReader
from matplotlib import pyplot
import csv
import pandas as pd
import numpy as np


# This function will return a list of positions where element exists in the dataframe.
def getIndexes(dfObj, value):
    listOfPos = []
    result = dfObj.isin([value])
    seriesObj = result.any()
    columnNames = list(seriesObj[seriesObj == True].index)
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
        for row in rows:
            listOfPos.append((row))
    return listOfPos


now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Dependencies loaded at", current_time)

In [None]:
##Specify relevant file paths and load data

nucFile='/path/to/Stenstrom_NucleolarProteome.xlsx'
idFile='/path/to/PRISM Drug Screen/TranscriptIDs.csv'
exprFile='/path/to/CCLE_expression_filtered.csv'
sensFile='/path/to/oxaliplatin (BRDBRD-K78960041-001-05-7) Drug sensitivity AUC (PRISM Repurposing Secondary Screen) 19Q4.csv'

nucDF = pd.read_excel(nucFile, sheet_name='All nucleoli genes')
nucGenes = nucDF.filter(items=['Gene name'])

sensDF = pd.read_csv(sensFile, usecols=['Depmap ID', 'Drug sensitivity AUC (PRISM Repurposing Secondary Screen) 19Q4', 'Lineage'])

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Data loaded at", current_time)

In [None]:
##Generate Transcript ID dictionary

idDF = pd.DataFrame(columns = ['ID'])

# open TranscriptsIDs in read mode
with open(idFile, 'r') as read_obj:
    id_dict_reader = DictReader(read_obj)
    # iterate over each line in drug data file
    for row in id_dict_reader:
        ID = row['ID']
        sID = ID.split(' (')
        rID = sID[0]
        idDF = idDF.append({'ID' : rID}, ignore_index = True)

print(idDF)

In [None]:
##Find nucleolar genes and calculate correlation between expression and Ox sensitivity

#Iterate over all nucleolar genes from the Stenstrom dataset
lfd = 10499
while lfd < len(idDF):

    selGene = idDF.iloc[[lfd]]
    sele = selGene.iloc[0,0]
    print(sele)

    Pos = getIndexes(idDF, sele)
    if len(Pos) == 1: 
        lPos = Pos[0]


        #Load AUC data for current nucleolar gene of interest
        filterDF = pd.read_csv(exprFile, usecols=[0,lPos+1], names=['Depmap ID', 'TPM'], header=None)

        #Find intersection between expression and sensitivity data
        mergeDF = pd.merge(filterDF, sensDF, how='inner', on='Depmap ID')


        #Calculate correlation between drug sensitivity and expression levels across all cell lines
        sens = mergeDF['Drug sensitivity AUC (PRISM Repurposing Secondary Screen) 19Q4']
        expr = mergeDF['TPM']
        corr = sens.corr(expr, method='pearson')
        print('All:', corr)

        #Filter for colorectal cancer cell lines only
        CRC = mergeDF[mergeDF["Lineage"] == "Colorectal"]
        sensCRC = CRC['Drug sensitivity AUC (PRISM Repurposing Secondary Screen) 19Q4']
        exprCRC = CRC['TPM']
        corrCRC = sensCRC.corr(exprCRC, method='pearson')
        print('CRC:', corrCRC)

        #Filter for central nervous system cancer cell lines only
        CNS = mergeDF[mergeDF["Lineage"] == "Central Nervous System"]
        sensCNS = CNS['Drug sensitivity AUC (PRISM Repurposing Secondary Screen) 19Q4']
        exprCNS = CNS['TPM']
        corrCNS = sensCNS.corr(exprCNS, method='pearson')
        print('CNS:', corrCNS)

        #Save correlation data 
        with open('PearsonCorrelation_AllGenes.csv', 'a') as corrfile:
            schr = csv.writer(corrfile, quoting=csv.QUOTE_ALL)
            schr.writerow([sele, corr, corrCRC, corrCNS])
    
    #Advance loop
    print()
    lfd += 1

print("Done!")