In [1]:
### eventually minimize on imports
import pandas as pd
import sys
import os
import subprocess
import shutil
import numpy as np
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import importlib
import plotly
import random

In [2]:
# Supress copy warning.
pd.options.mode.chained_assignment = None

In [3]:
def shell_do(command, log=False, return_log=False):
    print(f'Executing: {(" ").join(command.split())}', file=sys.stderr)

    res=subprocess.run(command.split(), stdout=subprocess.PIPE)

    if log:
        print(res.stdout.decode('utf-8'))
    if return_log:
        return(res.stdout.decode('utf-8'))

In [4]:
wd = '/YOUR/DIRECTORY'

### ML Data Prep

In [95]:
cmd = f'python run_data_prep.py \
--interval_name PARK2 \
--training_ids {wd}/ref_files/training_set_IDs.csv \
--out_path {wd}/testing/GP2_PRKN_all \
--create_testing yes \
--test_size 150' # currently acts as max test size b/c of missing snp metrics

In [96]:
conda_env = 'python_3_9'

with open('swarm/data_prep.sh', 'w') as f:
    f.write('#!/usr/bin/env bash\n\n')
    f.write('source /data/$USER/conda/etc/profile.d/conda.sh\n')
    f.write(f'conda activate {conda_env}\n')
    f.write(cmd)
    f.close()
    
with open('swarm/data_prep.swarm', 'w') as f:
    f.write('bash swarm/data_prep.sh')
    f.close()
    
swarm_cmd = f'swarm -f swarm/data_prep.swarm --g 200 --time=2:00:00 --logdir swarm/logs --module python/3.9'
shell_do(swarm_cmd)

Executing: swarm -f swarm/data_prep.swarm --g 200 --time=2:00:00 --logdir swarm/logs --module python/3.9


### LSTM Model

In [15]:
cmd = f'python run_lstm_model.py \
--train_file {wd}/ref_files/final_overlapping_del_training_set_5_intervals.csv \
--test_file {wd}/testing/GP2_PRKN_all_samples_windows.csv \
--feature_names dosage_interval dosage_gene del_dosage std_baf std_lrr iqr_baf iqr_lrr avg_baf avg_lrr \
--model_file {wd}/testing/GP2_PRKN_all_31_windows.keras \
--predict \
--print_summary \
--out_path {wd}/testing/GP2_PRKN_all'

In [16]:
conda_env = 'python_3_9'

with open('swarm/ml_model.sh', 'w') as f:
    f.write('#!/usr/bin/env bash\n\n')
    f.write('source /data/$USER/conda/etc/profile.d/conda.sh\n')
    f.write(f'conda activate {conda_env}\n')
    f.write(cmd)
    f.close()
    
with open('swarm/ml_model.swarm', 'w') as f:
    f.write('bash swarm/ml_model.sh')
    f.close()
    
swarm_cmd = f'swarm -f swarm/ml_model.swarm --g 200 --time=2:00:00 --logdir swarm/logs --module python/3.9'
shell_do(swarm_cmd)

Executing: swarm -f swarm/ml_model.swarm --g 200 --time=2:00:00 --logdir swarm/logs --module python/3.9


### Local Download & App Prep

In [24]:
cmd = f'python run_app_prep.py \
--interval_name PARK2 \
--test_set_ids {wd}/testing/GP2_PRKN_all_testing_IDs.csv \
--test_set_windows {wd}/testing/GP2_PRKN_all_samples_windows.csv \
--test_set_results {wd}/testing/GP2_PRKN_all_31_windows_results.csv \
--out_path {wd}/testing/GP2_PRKN_all \
--make_app_ready'

In [25]:
conda_env = 'python_3_9'

with open('swarm/app_prep.sh', 'w') as f:
    f.write('#!/usr/bin/env bash\n\n')
    f.write('source /data/$USER/conda/etc/profile.d/conda.sh\n')
    f.write(f'conda activate {conda_env}\n')
    f.write(cmd)
    f.close()
    
with open('swarm/app_prep.swarm', 'w') as f:
    f.write('bash swarm/app_prep.sh')
    f.close()
    
swarm_cmd = f'swarm -f swarm/app_prep.swarm --g 200 --time=2:00:00 --logdir swarm/logs --module python/3.9'
shell_do(swarm_cmd)

Executing: swarm -f swarm/app_prep.swarm --g 200 --time=2:00:00 --logdir swarm/logs --module python/3.9
