In [1]:
import sys
import pathlib
import scipy.io
import tqdm
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd


In [2]:
def get_file_type(file_path):
    if "iid_same" in file_path:
        return "iid_same"
    elif "iid_opposite" in file_path:
        return "iid_opposite"
    elif "rdw_same" in file_path:
        return "rdw_same"
    elif "rdw_opposite" in file_path:
        return "rdw_opposite"
    else:
        return "other"

In [8]:
data_csv =  sorted(str(p) for p in pathlib.Path("../data/new_path_2").glob("*.csv"))
sorted_files = sorted(data_csv, key=get_file_type)
data_csv_iid_op = [file for file in sorted_files if get_file_type(file) == "iid_opposite"]
data_csv_iid_same = [file for file in sorted_files if get_file_type(file) == "iid_same"]
data_csv_rdw_op = [file for file in sorted_files if get_file_type(file) == "rdw_opposite"]
data_csv_rdw_same = [file for file in sorted_files if get_file_type(file) == "rdw_same"]

data_sim_iid_op = [pd.read_csv(file, header=None, sep =';').to_numpy() for file in data_csv_iid_op]
data_sim_iid_same = [pd.read_csv(file, header=None, sep =';').to_numpy() for file in data_csv_iid_same]
data_sim_rdw_op = [pd.read_csv(file, header=None, sep =';').to_numpy() for file in data_csv_rdw_op]
data_sim_rdw_same = [pd.read_csv(file, header=None, sep =';').to_numpy() for file in data_csv_rdw_same]

#data_sim_iid_op = data_sim_iid_op[28:]
#data_sim_iid_same = data_sim_iid_same[28:]

#data_sim_rdw_op = data_sim_rdw_op[28:]
#data_sim_rdw_same = data_sim_rdw_same[28:]

In [10]:
import numpy as np
import scipy.io
import tqdm
import pandas as pd
import os
from tqdm.auto import trange

nsmooth = 5
noise = 3
nbias = 2
n = 200
indices = np.arange(0, 100)


conditions = [
    ("iid opposite", data_sim_iid_op),
    ("iid same", data_sim_iid_same),
    ("rdw opposite", data_sim_rdw_op),
    ("rdw same", data_sim_rdw_same),
]



for condition_name, list_of_datasets in conditions:
    print(f"\nSimulating condition: {condition_name}")

    for file_idx, data in enumerate(tqdm.tqdm(list_of_datasets, desc=f"{condition_name}")):
        df_results_all = {}
        num_indices = len(indices)
        rmdl = np.zeros(num_indices)
        nmdl = np.zeros(num_indices)
        mpos = np.zeros(num_indices)
        lpos = np.zeros(num_indices)
        mlastpos = np.zeros(num_indices)
        same = np.zeros(num_indices, dtype=bool)
        normative_sides = []
        all_resp = np.zeros((num_indices, n))


        for idx, tidx in enumerate(indices):
            
            pos = data[:, tidx].copy()

            invis = np.sum(pos[1:] == 0)
            pos[-invis:] = np.nan

            lastpos = pos[-invis - 1]
            visible_pos = pos[~np.isnan(pos)]
            n_visible = len(visible_pos)

            meanpos = np.nanmean(pos)
            n_tail = max(1, int(n_visible * 0.1))
            last_10pct_mean = np.mean(visible_pos[-n_tail:])

            mpos[idx] = meanpos
            lpos[idx] = lastpos
            mlastpos[idx] = last_10pct_mean
            same[idx] = meanpos * lastpos > 0

            if "iid" in condition_name:
                normative = np.sign(meanpos)
            else:
                normative = np.sign(lastpos)

            nmdl[idx] = normative
                        
            if "iid" in condition_name:
                value = meanpos
            else:  # RDW
                value = lastpos
            normative_side = 1 if value < 0 else 0
            normative_sides.append(normative_side)

            for k in range(n):
            #for k in trange(n, leave=False, desc=f"Sim {condition_name} file {file_idx} traj {idx}"):
                npos = pos + np.random.randn(len(pos)) * noise + np.random.randn() * nbias
                segment = npos[-(invis + nsmooth):-invis] if invis > 0 else npos[-nsmooth:]
                if len(segment) == 0 or np.isnan(segment).any():
                    resp = 0
                else:
                    resp = np.sign(np.mean(segment))

                all_resp[idx, k] = resp

            rmdl[idx] = np.sum(all_resp[idx, :] * normative > 0) / n
        normative_sides = np.array(normative_sides)
        df_result = pd.DataFrame({
            'trajectory_id': indices[:len(rmdl)],
            'condition': [condition_name] * len(rmdl),
            'rmdl': rmdl,
            'nmdl': nmdl,
            'normative_side':  normative_sides.reshape(-1),
            'meanpos': mpos,
            'lastpos': lpos,
            'last10pct': mlastpos,
            'same_sign': same.astype(int)
        })

        df_resp = pd.DataFrame(all_resp)
        df_results_all[condition_name] = df_result
        os.makedirs("../data/sbpm_outputs_new_path_28", exist_ok=True)
        condition = condition_name.replace(" ", "_")
        filename = f"../data/sbpm_outputs_new_path_46_2/sbpm_pred_on_new_{condition}_{file_idx}.csv"
        
        #df_result.to_csv(f"{out_base}_summary.csv", sep='\t', index=False)
        df_result.to_csv(filename, sep='\t',index=False)
        #print(f'file for {condition_name}_{file_idx} saved ')


Simulating condition: iid opposite


iid opposite: 100%|████████████████████████████████████████████████████████████████████| 28/28 [00:42<00:00,  1.50s/it]



Simulating condition: iid same


iid same: 100%|████████████████████████████████████████████████████████████████████████| 28/28 [00:41<00:00,  1.49s/it]



Simulating condition: rdw opposite


rdw opposite: 100%|████████████████████████████████████████████████████████████████████| 28/28 [00:40<00:00,  1.46s/it]



Simulating condition: rdw same


rdw same: 100%|████████████████████████████████████████████████████████████████████████| 28/28 [00:41<00:00,  1.47s/it]
