# Run in dlProject environment

In [1]:
import os
import pandas as pd
import numpy as np
initial_wd = "/zhome/42/8/213460"


In [2]:
def concatenate_and_save_data(root_folder, output_path):
    # Create an empty list to hold the data from all files
    all_data = []

    # Loop through each 'split' folder (split_1 to split_5)
    for split_number in range(1, 6):
        split_folder = os.path.join(root_folder, f'split_{split_number}')
        
        # Define the list of file names in each split folder
        file_names = [f'test_physionet2012_{split_number}.npy', f'train_physionet2012_{split_number}.npy', f'validation_physionet2012_{split_number}.npy']
        
        # Loop through each file and load the data
        for file_name in file_names:
            file_path = os.path.join(split_folder, file_name)
            
            # Check if the file exists
            if os.path.exists(file_path):
                # Load the data from the .npy file
                data = np.load(file_path, allow_pickle=True)
                
                # Convert the data to a pandas DataFrame
                df = pd.DataFrame({'data': data})
                
                # Optionally, add a column to track the split number and file type
                df['split'] = split_number
                df['type'] = file_name.split('_')[0]
                
                # Append the data from this file to the all_data list
                all_data.append(df)
            else:
                print(f"Path to {file_path} doesn't exist.")

    # Concatenate all DataFrames into one large DataFrame
    merged_df = pd.concat(all_data, ignore_index=True)

    print(merged_df.shape)

    file_path = os.path.join(output_path, 'data_merged.csv')
    if os.path.exists(file_path):
        print("File exists already.")
    # Add your code here to create or handle the file
    else:
        # save to csv
        merged_df.to_csv(file_path, index=False, encoding="utf-8", header=True, sep=";")
    return merged_df

In [3]:
input_path = initial_wd + "/DeepLearning-P24/P12data"
output_path = initial_wd + "/DeepLearning-P24/MAMBA"
data = concatenate_and_save_data(input_path, output_path)

(59940, 3)
File exists already.


# Run in Mamba Environment

In [5]:
import sys
import os
print(os.getcwd())
os.chdir(initial_wd + "/DeepLearning-P24/MAMBA")
print(os.getcwd())
from odyssey.models.model_utils import load_pretrain_data
path = os.getcwd() + "/data_merged.csv"

data = load_pretrain_data(path)
data.shape
data

/zhome/42/8/213460/DeepLearning-P24/MAMBA
/zhome/42/8/213460/DeepLearning-P24/MAMBA


Unnamed: 0,data,split,type
0,"{'ts_values': array([[ 0.169661 , 0. ...",1,test
1,"{'ts_values': array([[-0.32848355, 0. ...",1,test
2,"{'ts_values': array([[-0.93597676, 0. ...",1,test
3,"{'ts_values': array([[-0.94812659, 0. ...",1,test
4,"{'ts_values': array([[-0.57553073, 0. ...",1,test
...,...,...,...
59935,"{'ts_values': array([[-0.09534583, 0. ...",5,validation
59936,"{'ts_values': array([[-0.79239581, 0. ...",5,validation
59937,"{'ts_values': array([[-0.52646922, 0. ...",5,validation
59938,"{'ts_values': array([[-1.61435098, 0. ...",5,validation
