# Drug response Correlation to Dynamic Features 

## Initialisation

In [1]:
import os

import numpy as np
import pandas as pd

path = os.getcwd()
# find the string 'project' in the path, return index
index_project = path.find('project')
# slice the path from the index of 'project' to the end
project_path = path[:index_project+7]
# set the working directory
os.chdir(project_path)
print(f'Project path set to: {os.getcwd()}')

Project path set to: c:\Github\ode-biomarker-project


In [2]:
# Bring in CCLE data
from PathLoader import PathLoader
from DataLink import DataLink
path_loader = PathLoader('data_config.env', 'current_user.env')
data_link = DataLink(path_loader, 'data_codes.csv')

In [3]:
# load in original ccle data
loading_code = 'generic-gdsc-1-FGFR_0939-LN_IC50-fgfr4_ccle_dynamic_features-true-Row'
# generic-gdsc-{number}-{drug_name}-{target_label}-{dataset_name}-{replace_index}-{row_index}
feature_data, label_data = data_link.get_data_using_code(loading_code)

In [4]:
loading_code = 'fgfr4_ccle_dynamic_features_v2'
dynamic_features = data_link.get_data_from_code(loading_code)

In [5]:
# attempt load from loading code

loading_code = 'fgfr4_model_raw_simulation'
simulation_data = data_link.get_data_from_code(loading_code)

In [6]:
# Import dynamic simulations
from PathLoader import PathLoader
from DataLink import DataLink
path_loader = PathLoader('data_config.env', 'current_user.env')
data_link = DataLink(path_loader, 'data_codes.csv')

In [16]:
### Initialisation Data 
dynamic_feature = data_link.get_data_from_code('fgfr4_ccle_dynamic_features_v2')
ccle_data = data_link.get_data_from_code('ccle')

# load in original ccle data
loading_code = 'ccle-gdsc-1-FGFR_0939-LN_IC50-sin'
feature_data, label_data = data_link.get_data_using_code(loading_code)

match_rules_file = data_link.get_data_from_code('fgfr4_model_ccle_match_rules')
match_rules_files_dropna = match_rules_file.dropna(subset=['reference'])
match_rules_files_dropna_active_only = match_rules_files_dropna.loc[match_rules_files_dropna['is_active_form'] == True]

# load in original ccle data
loading_code = 'generic-gdsc-1-FGFR_0939-LN_IC50-fgfr4_ccle_dynamic_features_v2-true-Unnamed: 0'
# generic-gdsc-{number}-{drug_name}-{target_label}-{dataset_name}-{replace_index}-{row_index}
dynamic_feature_data, dynamic_label_data = data_link.get_data_using_code(loading_code)

dynamic_feature_label = ['auc', 'median', 'tfc', 'tmax',
                 'max', 'tmin', 'min', 'ttsv', 'tsv']


### Options
folder_name = 'FGFR4_dynamic_vs_drug_response'
save_figure = True

In [17]:
print(dynamic_feature_data.shape)
print(feature_data.shape)

(665, 260)
(667, 19221)


In [18]:
# remove samples present in feature data but not in dynamic feature data
new_feature_data = feature_data.loc[feature_data.index.isin(dynamic_feature_data.index)]
new_label_data = label_data.loc[label_data.index.isin(dynamic_feature_data.index)]

# check size 
print(new_feature_data.shape)
print(new_label_data.shape)

(665, 19221)
(665,)


## Config

In [19]:
from scipy.stats import pearsonr

import matplotlib.pyplot as plt

import os

# ignore ConstantInputWarning
import warnings
from scipy.stats import ConstantInputWarning
warnings.simplefilter('ignore', ConstantInputWarning)


if not os.path.exists(f'{path_loader.get_data_path()}data/results/{folder_name}'):
    os.makedirs(f'{path_loader.get_data_path()}data/results/{folder_name}')

file_save_path = f'{path_loader.get_data_path()}data/results/{folder_name}/'

species_ccle_matches = {}
for i in range(len(match_rules_files_dropna_active_only)):
    row = match_rules_files_dropna_active_only.iloc[i]
    specie_name = row['specie']
    ccle_matches = row['reference']
    ccle_matches = ccle_matches.split(';')
    species_ccle_matches[specie_name] = ccle_matches
    
feature_data = new_feature_data
label_data = new_label_data
    
for s in species_ccle_matches:
    dynamic_specie = s 
    gene_list = species_ccle_matches[s]
    dynamic_features = dynamic_feature_data[[dynamic_specie + '_' + dynamic_feature for dynamic_feature in dynamic_feature_label]]
    
    fig, axes = plt.subplots(2, 5, figsize=(20, 8))

    for i, dynamic_feature in enumerate(dynamic_feature_label):
        ax = axes.flatten()[i]
        label_data = dynamic_label_data
        ax.scatter(dynamic_label_data, dynamic_features[dynamic_specie + '_' + dynamic_feature], alpha=0.5, s=10, color='purple')
        
        corr, p_val = pearsonr(dynamic_label_data, dynamic_features[dynamic_specie + '_' + dynamic_feature])
        corr_str = f'{corr:.2f}' if p_val > 0.05 else f'{corr:.2f}*'
        ax.text(0.05, 0.95, f'corr: {corr_str}', transform=ax.transAxes, fontsize=12,
                verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.5))
        # change the x and y axis labels size
        ax.tick_params(axis='both', which='major', labelsize=12)
        ax.set_xlabel('LN_IC50', fontsize=15)
        ax.set_ylabel(dynamic_specie+'_'+dynamic_feature, fontsize=15)
    
    # the final 10th plot is the label data vs original feature data
    ax = axes.flatten()[9]
    for gene in gene_list:
        gene_expression = feature_data[gene]
        ax.scatter(dynamic_label_data, gene_expression, alpha=0.5, s=10, label=gene)
    
    gene_list_with_corr = []
    for gene in gene_list:
        gene_expression = feature_data[gene]
        corr, p_val = pearsonr(dynamic_label_data, gene_expression)
        corr_str = f'{corr:.2f}' if p_val > 0.05 else f'{corr:.2f}*'
        gene_corr_string = f'{gene} corr: {corr_str}'
        gene_list_with_corr.append(gene_corr_string)
        
    ax.legend(gene_list_with_corr, loc='upper left')
    ax.set_xlabel('LN_IC50', fontsize=15)
    ax.set_ylabel(dynamic_specie, fontsize=15)
    plt.suptitle(f'{dynamic_specie} dynamic/original features vs LN_IC50 of Palbociclib', fontsize=20)
    plt.tight_layout()
    
    # plt.show()
    # break
    if save_figure:
        plt.savefig(f'{file_save_path}{dynamic_specie}_dynamic_features_vs_LN_IC50.png')    
    # do not show the plot
    plt.close()
    print(f'{dynamic_specie} dynamic features vs LN_IC50 plotted')

SPRY2 dynamic features vs LN_IC50 plotted
PTP dynamic features vs LN_IC50 plotted
FOXO dynamic features vs LN_IC50 plotted
