In [1]:
import os
import pandas as pd

def count_dataframe_rows(file_path):
    try:
        df = pd.read_csv(file_path, sep='\t')
        return len(df)
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return float('inf')

def find_min_table_rows(directory):
    min_rows = float('inf')
    min_file = None
    
    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory, filename)
            rows = count_dataframe_rows(file_path)
            print(f"File: {filename}, Rows: {rows}")
            if rows < min_rows:
                min_rows = rows
                min_file = filename

    return min_file, min_rows

directory = '/dcs/large/u2212061/PRV_features/Window_21/'
min_file, min_rows = find_min_table_rows(directory)

if min_file:
    print(f"\nFile with minimum rows: {min_file}")
    print(f"Minimum number of rows: {min_rows}")
else:
    print("No valid text files found in the specified directory.")

File: mesa-sleep-3132.txt, Rows: 678
File: mesa-sleep-5568.txt, Rows: 1419
File: mesa-sleep-5433.txt, Rows: 1179
File: mesa-sleep-4352.txt, Rows: 1299
File: mesa-sleep-0560.txt, Rows: 1059
File: mesa-sleep-5261.txt, Rows: 1179
File: mesa-sleep-1301.txt, Rows: 1419
File: mesa-sleep-6599.txt, Rows: 1059
File: mesa-sleep-5214.txt, Rows: 1059
File: mesa-sleep-2800.txt, Rows: 1419
File: mesa-sleep-2957.txt, Rows: 1179
File: mesa-sleep-5239.txt, Rows: 1179
File: mesa-sleep-2723.txt, Rows: 1238
File: mesa-sleep-3785.txt, Rows: 1419
File: mesa-sleep-4305.txt, Rows: 1419
File: mesa-sleep-4266.txt, Rows: 1419
File: mesa-sleep-6656.txt, Rows: 1419
File: mesa-sleep-4989.txt, Rows: 1299
File: mesa-sleep-3206.txt, Rows: 1059
File: mesa-sleep-0935.txt, Rows: 1599
File: mesa-sleep-6509.txt, Rows: 1119
File: mesa-sleep-5617.txt, Rows: 1179
File: mesa-sleep-2441.txt, Rows: 1299
File: mesa-sleep-5827.txt, Rows: 1299
File: mesa-sleep-4267.txt, Rows: 1479
File: mesa-sleep-0419.txt, Rows: 1419
File: mesa-sl

In [2]:
import datetime

def calculate_sliding_windows(total_hours, small_window_seconds, windows_per_group):
    total_seconds = total_hours * 3600
    large_window_seconds = small_window_seconds * windows_per_group
    num_groups = (total_seconds - large_window_seconds) // small_window_seconds + 1
    
    print(f"Total number of groups: {num_groups}")


total_hours = 10
small_window_seconds = 30
windows_per_group = 21

calculate_sliding_windows(total_hours, small_window_seconds, windows_per_group)

Total number of groups: 1180


In [3]:
df = pd.read_csv('/dcs/large/u2212061/PRV_features/Window_21/mesa-sleep-0001.txt', sep='\t', skiprows=1, header=None)
original_length = len(df)

print(original_length)

1419


In [4]:
import os
import pandas as pd

def truncate_file(file_path, output_directory, max_lines=1180):
    df = pd.read_csv(file_path, sep='\t', skiprows=1, header=None)
    original_length = len(df)

    with open(file_path, 'r') as f:
        header = f.readline().strip()

    output_filename = os.path.join(output_directory, os.path.basename(file_path))

    if original_length > max_lines:
        df = df.iloc[:max_lines]
        with open(output_filename, 'w') as f:
            f.write(header + '\n')
            df.to_csv(f, sep='\t', header=False, index=False)
        print(f"Truncated {file_path} from {original_length} to {max_lines} lines (excluding header)")
        print(f"Saved truncated file to {output_filename}")
    else:
        with open(output_filename, 'w') as f:
            f.write(header + '\n') 
            df.to_csv(f, sep='\t', header=False, index=False)
        print(f"No truncation needed for {file_path} ({original_length} lines, excluding header)")
        print(f"Copied file to {output_filename}")

def process_directory(input_directory, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for filename in os.listdir(input_directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(input_directory, filename)
            truncate_file(file_path, output_directory)



In [5]:
input_directory = '/dcs/large/u2212061/PRV_features/Window_21/'
output_directory = '/dcs/large/u2212061/PRV_features/Window_21_truncated/' 
process_directory(input_directory, output_directory)

No truncation needed for /dcs/large/u2212061/PRV_features/Window_21/mesa-sleep-3132.txt (678 lines, excluding header)
Copied file to /dcs/large/u2212061/PRV_features/Window_21_truncated/mesa-sleep-3132.txt
Truncated /dcs/large/u2212061/PRV_features/Window_21/mesa-sleep-5568.txt from 1419 to 1180 lines (excluding header)
Saved truncated file to /dcs/large/u2212061/PRV_features/Window_21_truncated/mesa-sleep-5568.txt
No truncation needed for /dcs/large/u2212061/PRV_features/Window_21/mesa-sleep-5433.txt (1179 lines, excluding header)
Copied file to /dcs/large/u2212061/PRV_features/Window_21_truncated/mesa-sleep-5433.txt
Truncated /dcs/large/u2212061/PRV_features/Window_21/mesa-sleep-4352.txt from 1299 to 1180 lines (excluding header)
Saved truncated file to /dcs/large/u2212061/PRV_features/Window_21_truncated/mesa-sleep-4352.txt
No truncation needed for /dcs/large/u2212061/PRV_features/Window_21/mesa-sleep-0560.txt (1059 lines, excluding header)
Copied file to /dcs/large/u2212061/PRV_fea

In [6]:
input_directory = '/dcs/large/u2212061/PRV_stage/Window_21/'
output_directory = '/dcs/large/u2212061/PRV_stage/Window_21_truncated/'
process_directory(input_directory, output_directory)

No truncation needed for /dcs/large/u2212061/PRV_stage/Window_21/mesa-sleep-3132.txt (678 lines, excluding header)
Copied file to /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-3132.txt
Truncated /dcs/large/u2212061/PRV_stage/Window_21/mesa-sleep-5568.txt from 1419 to 1180 lines (excluding header)
Saved truncated file to /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-5568.txt
No truncation needed for /dcs/large/u2212061/PRV_stage/Window_21/mesa-sleep-5433.txt (1179 lines, excluding header)
Copied file to /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-5433.txt
Truncated /dcs/large/u2212061/PRV_stage/Window_21/mesa-sleep-4352.txt from 1299 to 1180 lines (excluding header)
Saved truncated file to /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-4352.txt
No truncation needed for /dcs/large/u2212061/PRV_stage/Window_21/mesa-sleep-0560.txt (1059 lines, excluding header)
Copied file to /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa

In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import os

def standardize_file(file_path, output_directory):
    df = pd.read_csv(file_path, sep='\t') 
    features = df.columns
    
    scaler = StandardScaler()
    
    scaled_data = scaler.fit_transform(df)
    scaled_df = pd.DataFrame(scaled_data, columns=features)

    base_name = os.path.basename(file_path)
    output_file = os.path.join(output_directory, f"{base_name}")
    
    scaled_df.to_csv(output_file, sep='\t', index=False)
    
    print(f"Standardized data saved to {output_file}")

def process_directory(input_directory, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    for filename in os.listdir(input_directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(input_directory, filename)
            standardize_file(file_path, output_directory)

input_directory = '/dcs/large/u2212061/PRV_features/Window_21_truncated/'  
output_directory = '/dcs/large/u2212061/PRV_features/Window_21_normalized/' 
process_directory(input_directory, output_directory)

Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-3132.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-5568.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-5433.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-4352.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-0560.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-5261.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-1301.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-6599.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-5214.txt
Standardized data saved to /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep

In [8]:
import pandas as pd
import numpy as np
import os

def pad_file(file_path, output_directory, pad_value, target_rows=1180):
    df = pd.read_csv(file_path, sep='\t')  
    current_rows = len(df)
    
    if current_rows < target_rows:
        rows_to_add = target_rows - current_rows
        
        pad_data = pd.DataFrame([[pad_value] * len(df.columns)] * rows_to_add, columns=df.columns)
        padded_df = pd.concat([df, pad_data], ignore_index=True)
        
        base_name = os.path.basename(file_path)
        output_file = os.path.join(output_directory, f"{base_name}")
        
        padded_df.to_csv(output_file, sep='\t', index=False)
        
        print(f"Padded file saved to {output_file} (Added {rows_to_add} rows)")
    else:
        base_name = os.path.basename(file_path)
        output_file = os.path.join(output_directory, f"{base_name}")

        df.to_csv(output_file, sep='\t', index=False)

        print(f"No padding needed for {file_path} (Current rows: {current_rows})")

def process_directory(input_directory, output_directory, pad_value):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    for filename in os.listdir(input_directory):
        if filename.endswith('.txt'):
            file_path = os.path.join(input_directory, filename)
            pad_file(file_path, output_directory, pad_value)



In [9]:
input_directory = '/dcs/large/u2212061/PRV_features/Window_21_normalized/'  
output_directory = '/dcs/large/u2212061/PRV_features/Window_21_final/'
pad_value = 100
process_directory(input_directory, output_directory, pad_value)

Padded file saved to /dcs/large/u2212061/PRV_features/Window_21_final/mesa-sleep-3132.txt (Added 502 rows)
No padding needed for /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-5568.txt (Current rows: 1180)
Padded file saved to /dcs/large/u2212061/PRV_features/Window_21_final/mesa-sleep-5433.txt (Added 1 rows)
No padding needed for /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-4352.txt (Current rows: 1180)
Padded file saved to /dcs/large/u2212061/PRV_features/Window_21_final/mesa-sleep-0560.txt (Added 121 rows)
Padded file saved to /dcs/large/u2212061/PRV_features/Window_21_final/mesa-sleep-5261.txt (Added 1 rows)
No padding needed for /dcs/large/u2212061/PRV_features/Window_21_normalized/mesa-sleep-1301.txt (Current rows: 1180)
Padded file saved to /dcs/large/u2212061/PRV_features/Window_21_final/mesa-sleep-6599.txt (Added 121 rows)
Padded file saved to /dcs/large/u2212061/PRV_features/Window_21_final/mesa-sleep-5214.txt (Added 121 rows)
No padding 

In [10]:
input_directory = '/dcs/large/u2212061/PRV_stage/Window_21_truncated/'  
output_directory = '/dcs/large/u2212061/PRV_stage/Window_21_final/'  
pad_value = -1
process_directory(input_directory, output_directory, pad_value)

Padded file saved to /dcs/large/u2212061/PRV_stage/Window_21_final/mesa-sleep-3132.txt (Added 502 rows)
No padding needed for /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-5568.txt (Current rows: 1180)
Padded file saved to /dcs/large/u2212061/PRV_stage/Window_21_final/mesa-sleep-5433.txt (Added 1 rows)
No padding needed for /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-4352.txt (Current rows: 1180)
Padded file saved to /dcs/large/u2212061/PRV_stage/Window_21_final/mesa-sleep-0560.txt (Added 121 rows)
Padded file saved to /dcs/large/u2212061/PRV_stage/Window_21_final/mesa-sleep-5261.txt (Added 1 rows)
No padding needed for /dcs/large/u2212061/PRV_stage/Window_21_truncated/mesa-sleep-1301.txt (Current rows: 1180)
Padded file saved to /dcs/large/u2212061/PRV_stage/Window_21_final/mesa-sleep-6599.txt (Added 121 rows)
Padded file saved to /dcs/large/u2212061/PRV_stage/Window_21_final/mesa-sleep-5214.txt (Added 121 rows)
No padding needed for /dcs/large/u2212061