In [None]:
from pathlib import Path
import sys
import time
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
import tauso
from notebooks.preprocessing import *
from notebooks.features.feature_extraction import save_feature

In [None]:
PROJECT_ROOT = Path.cwd().parents[1]
sys.path.insert(0, str(PROJECT_ROOT))
csv_path = PROJECT_ROOT / "data" / "data_asoptimizer_updated.csv"

In [None]:
aso_data = preprocess_aso_data(csv_path=csv_path)

In [None]:
aso_data.head()

In [None]:
df_new = aso_data.copy()

batch_result = calculate_sense_accessibility_batch(
    df_new,
    batch_size=1000
)

df_new = df_new.merge(
    batch_result,
    left_index=True,
    right_on='rna_id',
    how='left'
)

new_result = df_new['access'].astype(float)

In [None]:
batch_result = batch_result.rename(columns={'rna_id':'index'})

In [None]:
batch_result = batch_result.rename(columns={'access':'access_120flank_13access_size_13seed_size'})

In [None]:
save_feature(df=batch_result, feature_name= 'access_120flank_13access_size_13seed_size')

In [None]:
configurations = [
    {"flank": 120, "access": 20, "seeds": [13]},
    {"flank": 120, "access": 13, "seeds": [4, 6, 8]},
    {"flank": 120, "access": 20, "seeds": [4, 6, 8]}
]

df_work = aso_data.copy()

for config in configurations:
    c_flank = config["flank"]
    c_access = config["access"]
    c_seeds = config["seeds"]

    print(f"Running: Flank={c_flank}, Access={c_access}, Seeds={c_seeds})...")

    batch_result = calculate_sense_accessibility_batch(
        df_work,
        batch_size=1000,
        flank_size=c_flank,
        access_size=c_access,
        seed_sizes=c_seeds
    )
    seeds_str = "-".join(map(str, c_seeds))
    feature_name = f'access_{c_flank}flank_{c_access}access_{seeds_str}seed_sizes'

    batch_result = batch_result.rename(columns={'rna_id': 'index', 'access': feature_name})
    save_feature(df=batch_result, feature_name=feature_name)
    print(f"Saved: {feature_name}")