Time,	Brachial Data,	Carotid Diameter,	blood velocity

## **Add independent noise to each column: ‘Brachial Data’, ‘Carotid Diameter’, and ‘Blood Velocity’.**


In [None]:
import pandas as pd
import numpy as np
import glob
import os
import math

input_dir = r'C:\LR_Train_107'
output_dir = r'C:\LR_Train_107_Noise_sigma0.10_x30'

os.makedirs(output_dir, exist_ok=True)

# Read the list of original Excel files
excel_files = glob.glob(os.path.join(input_dir, '*.xlsx'))

orig_count = len(excel_files)
target = 1421  # When multiplying, remember to subtract 1 to generate 127 data samples for the High-risk group,
               # then copy the original data to make a total of 128.
               # => 127 * 30 = 3810 target data
               # Desired number of jittered files.
               # Example: 100 / 10 ≈ 10 → ceil = 10.
               # Thus, each original file will generate up to 10 jittered versions.

per_file = math.ceil(target / orig_count)

count = 0
for file_path in excel_files:
    df = pd.read_excel(file_path)
    base = os.path.splitext(os.path.basename(file_path))[0]

    for i in range(per_file):
        if count >= target:
            break

        df_j = df.copy()

        # Apply independent noise to each column
        for col in ['Brachial Data', 'Carotid Diameter', 'blood velocity']:
            min_val, max_val = df[col].min(), df[col].max()
            sigma = 0.1 * (max_val - min_val)  # e.g., 10% of the (max–min) range
            noise = np.random.normal(0, sigma, size=len(df))
            df_j[col] = df_j[col] + noise

        # Save the generated file
        new_name = f"{base}_noise10%_{i+1}.xlsx"
        df_j.to_excel(os.path.join(output_dir, new_name), index=False)

        count += 1

    if count >= target:
        break

print(f"A total of {count} jittered files have been generated in '{output_dir}'")
