In [1]:
import pandas as pd 
import subprocess
import os

In [None]:

current_dir = os.path.abspath('')
data_path = os.path.join(current_dir, '..', 'data')
original_data_path = os.path.join(data_path, 'original_data')
scripts_path = os.path.join(current_dir, 'scripts')
model_data_path = os.path.join(data_path, 'processed_data', 'replicated_models')
model_output_path = os.path.join('..', 'model_outputs')

In [None]:
tss_path = os.path.join(original_data_path, "rLP5_Endo2_lb_expression_formatted.txt")
scramble_path = os.path.join(original_data_path, "endo_scramble_expression_formatted.txt")
peak_path= os.path.join(original_data_path, "peak_tile_expression_formatted.txt")
flp3_path = os.path.join(original_data_path, "fLP3_Endo2_lb_expression_formatted.txt")
rlp6_path = os.path.join(original_data_path, "rLP6_Endo2_lb_expression_formatted.txt")

In [None]:
def generate_processed_std_files():
    r_script_path = os.path.join(scripts_path, "standardize_expression.R")
    result = subprocess.run(["Rscript", r_script_path, tss_path, scramble_path, peak_path, flp3_path, rlp6_path], shell=True, capture_output=True, text=True)

    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

    if result.returncode == 0:
        print("R script ran successfully.")
    else:
        print(f"R script failed with return code {result.returncode}.")

generate_processed_std_files()

In [None]:
def generate_train_test_files():
    python_path = os.path.join(scripts_path, "define_genome_splits.py")
    model_format_path = os.path.join(model_data_path, "tss_expression_model_format.txt")
    result = subprocess.run(["python", python_path, str(0.75), str(4639675), model_format_path, model_format_path], shell=True, capture_output=True, text=True)

    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

    if result.returncode == 0:
        print("R script ran successfully.")
    else:

        print(f"R script failed with return code {result.returncode}.")

In [None]:
train_name = os.path.join(model_data_path, "tss_expression_model_format_train_genome_split.txt")
test_name = os.path.join(model_data_path, "tss_expression_model_format_test_genome_split.txt")

output_name = os.path.join(model_data_path, "tss_expression_pwm_info.txt")

In [None]:
def get_PWM_model_results():
    python_path = os.path.join(scripts_path, "calculate_pwm.R")
    result = subprocess.run(["Rscript", python_path, train_name, test_name, output_name], shell=True, capture_output=True, text=True)

    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

    if result.returncode == 0:
        print("R script ran successfully.")
    else:
        print(f"R script failed with return code {result.returncode}.")

get_PWM_model_results()
    

In [None]:
def run_linear_model():
    python_path = os.path.join(scripts_path, "linear-model.R")
    output_path = os.path.join(model_output_path, "pwm_linear_model_results.txt")
    model_input = os.path.join(model_data_path, "tss_expression_pwm_info.txt")
    result = subprocess.run(["Rscript", python_path, model_input, output_path], shell=True, capture_output=True, text=True)

    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

    # Check exit status
    if result.returncode == 0:
        print("R script ran successfully.")
    else:
        print(f"R script failed with return code {result.returncode}.")

run_linear_model()

In [None]:
python_path = os.path.join(scripts_path, "define_genome_splits.py")
model_format_path = os.path.join(model_data_path, "tss_expression_model_format.txt")
result = subprocess.run(["python", python_path, str(0.75), str(4639675), "--classification", model_format_path, model_format_path], shell=True, capture_output=True, text=True)

print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)

if result.returncode == 0:
    print("R script ran successfully.")
else:

    print(f"R script failed with return code {result.returncode}.")

In [None]:
train_name = os.path.join(model_data_path, "tss_expression_model_format_train_genome_split_classification.txt")
test_name = os.path.join(model_data_path, "tss_expression_model_format_test_genome_split_classification.txt")

train_output_name = os.path.join(model_data_path, "tss_scramble_peak_expression_model_format_train_genome_split_classification_3to6mer.txt")
test_output_name = os.path.join(model_data_path, "tss_scramble_peak_expression_model_format_test_genome_split_classification_3to6mer.txt")


In [None]:
python_path = os.path.join(scripts_path, "kmer_feature_generator.py")
model_format_path = os.path.join(model_data_path, "tss_expression_model_format.txt")
result = subprocess.run(["python", python_path, train_name, test_name, train_output_name, test_output_name, str(3), str(6)], shell=True, capture_output=True, text=True)

print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)

if result.returncode == 0:
    print("R script ran successfully.")
else:

    print(f"R script failed with return code {result.returncode}.")

In [None]:
log_output_path = os.path.join(model_data_path, "kmer_logistic_predictions.txt")

In [None]:
python_path = os.path.join(scripts_path, "kmer_regression_models.py")
result = subprocess.run(["python", python_path, train_output_name, test_output_name, log_output_path, "linear", "--classification"], shell=True, capture_output=True, text=True)

print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)

if result.returncode == 0:
    print("R script ran successfully.")
else:

    print(f"R script failed with return code {result.returncode}.")