# Instructions to generate data using the GA

This script is used to generate the alldata.csv file in the data folder of the following github repository: https://github.com/krl4005/RRC-ConductanceProfile-Project.git 

If you want to generate new data, run the run_ga.sh script (which calls the run_ga.py file). This could be done by following these steps: 

* type *sbatch run_ga.sh './data/trial1_'* (the path in quotes represents where and to what name you want to save the data to.)
* This should take about 16 hours and when done it will save to the data folder. 
* Using that new data, we can use this script to generate a new all_data file.

In [1]:
# IMPORT FUNCTIONS
import pandas as pd
from important_functions import generate_alldata, get_cond_data, get_robust_data, get_local_sensitivity, get_rrc_data, get_baseline_torord_data, get_ind

save_data_to = './data/'
get_data_from = './data/'


# Generate all_data.csv file

In [None]:
# Run the generate_alldata function
all_trials = generate_alldata(get_data_from, trials = ['trial1', 'trial2', 'trial3', 'trial4', 'trial6', 'trial7', 'trial8'])

# This selects and organizes specific columns since the file is too large when saving all the data.
# It then saves the data to the RRC Conductance Profile Repo
all_data = all_trials[['gen', 'trial', 'i_cal_pca_multiplier', 'i_ks_multiplier', 'i_kr_multiplier', 'i_nal_multiplier', 'i_na_multiplier', 'i_to_multiplier', 'i_k1_multiplier', 'i_NCX_multiplier', 'i_nak_multiplier', 'fitness', 'rrc', 'rrc_error', 'total_feature_error', 'total_morph_error', 'apd90_AP4']] #'apd90_AP4_error'
all_data.to_csv(save_data_to+'all_data.csv.bz2', index=False)

# Generate best_data.csv file

In [None]:
best_data = all_trials[(all_trials['fitness']<2800) & (all_trials['total_morph_error']==0) & (all_trials['total_feature_error']==0)]
best_data.reset_index(inplace=True)
best_data.to_csv(save_data_to+'best_data.csv.bz2', index=False)
best_data.to_csv('./data/best_data.csv.bz2', index=False)


# Script to generate the fig2_data.csv file

In [None]:
fig2_data = all_trials[(all_trials['gen']==0) | (all_trials['gen']==30) | (all_trials['gen']==99)]
fig2_data = fig2_data[['gen', 'trial', 'fitness', 'rrc', 't', 'v', 'cai']]
fig2_data.to_csv(save_data_to+'fig2_data.csv.bz2', index=False)

# Generate cond_data.pkl and robust_data.pkl

In [None]:
get_cond_data(best_data_path = get_data_from+'best_data.csv.bz2', save_to = save_data_to+'cond_data.pkl') 
get_robust_data(best_data_path = get_data_from+'best_data.csv.bz2', save_to = save_data_to+'robust_data.pkl')

# Generate local sensitivity & RRC data: apd_rrc_sens_baseline.csv, apd_rrc_sens_opt.csv, apd_rrc_sens_grandi.csv, and rrc_data.csv

This could be run locally but it takes A LONGGGGG TIME (like a few hours). If you would like to run this data on the cluster (which only takes about 20 minutes) you could using the *run_get_data.sh* file. If you do not want to recollect all this data just comment out the lines you do not need. 

In [None]:
# Sensitivity data for baseline Grandi Model
get_local_sensitivity(best_data_path={'i_cal_pca_multiplier':1, 'i_ks_multiplier':1, 'i_kr_multiplier':1, 'i_nal_multiplier':0, 'i_na_multiplier':1, 'i_to_multiplier':1, 'i_k1_multiplier':1, 'i_NCX_multiplier':1, 'i_nak_multiplier':1}, save_to = save_data_to+'sens_grandi.csv.bz2', model = 'grandi_flat.mmt', stim = 1, length = 5)

# Sensitivity data for optimized ToR-ORd Model
get_local_sensitivity(best_data_path=get_data_from+'best_data.csv.bz2', save_to = save_data_to+'sens_opt.csv.bz2')

# Sensitivity data for baseline ToR-ORd Model
get_local_sensitivity(best_data_path=get_ind(), save_to = save_data_to+'sens_baseline.csv.bz2')

# RRC data for baseline ToR-ORd Model and all 220 optimized models
get_rrc_data(best_data_path=get_data_from+'best_data.csv.gz', save_to = save_data_to+'rrc_data.csv.bz2')

# Generate baseline_torord_data.csv

In [None]:
get_baseline_torord_data(save_to= './data/baseline_torord_data.csv.bz2')