In [14]:
import os
import csv
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d


# ----------------------------
# User-defined Configuration
# ----------------------------
# Input the filename for the CSV file containing raw data in the required format
rawFilename = "rawDataFilename"    #Do not include ".csv"

# Input the filename for the CSV file the preprocessed data is to be stored to
procFilename = "preprocessedDataFilename"    #Do not include ".csv"



# ----------------------------
# Preprocessing Functions
# ----------------------------
# Function preprocessing raw data
def process_experiments(raw_filename, proc_filename):
    """
    Reads raw CSV, processes each experiment group (6 columns), and stores results.
    
    Parameters:
    - raw_filename: Name of the CSV file containing raw data
    - cpa_filename: Name of the CSV file the processed data is going to be stored
    """
    delim = detect_delimiter(f"{raw_filename}.csv")
    df = pd.read_csv(f"{raw_filename}.csv", sep=delim)
    cols = list(df.columns)
    total_cols = len(cols)
    
    if total_cols % 6 != 0:
        raise ValueError("Raw data must have columns multiple of 6")

    num_experiments = total_cols // 6
    for idx in range(num_experiments):
        base = idx * 6
        dataTag = cols[base]
        # parse preLoad, keep as string if not numeric
        try:
            preLoad = float(cols[base + 1])
        except ValueError:
            preLoad = cols[base + 1]

        # extract and clean data arrays
        x0 = df[cols[base + 2]].dropna().values
        y0 = df[cols[base + 3]].dropna().values
        x1 = df[cols[base + 4]].dropna().values
        y1 = df[cols[base + 5]].dropna().values

        # align lengths by interpolating the longer onto the shorter
        if len(x0) >= len(x1):
            base_x, base_y1 = x1, y1
            f0 = interp1d(x0, y0, kind='linear', bounds_error=False, fill_value="extrapolate")
            y0i = f0(base_x)
            y_diff = base_y1 - y0i
        else:
            base_x, base_y0 = x0, y0
            f1 = interp1d(x1, y1, kind='linear', bounds_error=False, fill_value="extrapolate")
            y1i = f1(base_x)
            y_diff = y1i - base_y0

        # store processed results
        storeANNDataCSV(base_x, y_diff, proc_filename, dataTag, preLoad)
    return


# Function storing processed data in a CSV file in required format
def storeANNDataCSV(x_stress, y_straindiff, filename, dataTag, preLoad):
    """
    Stores processed data into a CSV file with header 'dataTag,preLoad,xy1,...,xy150'.
    Interpolates y_straindiff onto 150 equally spaced stress points and computes xy_energy.
    
    Parameters:
    - dataTag: Unique identifier for the DRA experiment.
    - x_stress: List or array of x-axis values representing stress.
    - y_straindiff: List or array of y-axis values representing strain difference.
    - preLoad: Pre-load value associated with the DRA experiment.
    - filename: Name of the CSV file the preprocessed data is stored.
    """
    filepath = f"{filename}.csv"
    # Create file and header if not exists
    if not os.path.exists(filepath):
        with open(filepath, "w", newline='', encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            header = ["dataTag", "preLoad"] + [f"xy{i}" for i in range(1,151)]
            writer.writerow(header)
    # Skip if already stored
    if check_unique(filename, dataTag):
        print(f"{dataTag} already exists. Skipping.")
        return

    # New equally spaced x values
    start_x, end_x = x_stress[0], x_stress[-1]
    new_x = np.linspace(start_x, end_x, 150)
    f = interp1d(x_stress, y_straindiff, kind='linear', fill_value="extrapolate")
    y_interp = f(new_x)

    # Normalize and compute xy_energy
    max_y = np.max(y_interp)
    if max_y == 0:
        y_norm = np.zeros_like(y_interp)
    else:
        y_norm = y_interp / max_y
    xy_energy = new_x * y_norm

    # Append row
    row = [dataTag, preLoad] + list(xy_energy)
    with open(filepath, "a", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(row)
    print(f"{dataTag} added.")
    return

    
# Function which checks if an experiment has already been stored in the CSV file we are storing the processed data to.
def check_unique(filename, dataTag):
    """
    Check if an experiment with the same dataTag already exists in the processed CSV file.
    
     Parameters:
    - dataTag: Unique identifier for the DRA experiment.
    - filename: Name of the CSV file the preprocessed data is stored.
    """
    filepath = f"{filename}.csv"
    if not os.path.exists(filepath):
        return False
    with open(filepath, "r", newline='', encoding="utf-8") as csvfile:
        reader = csv.reader(csvfile)
        header = next(reader, None)
        for row in reader:
            if row and row[0] == dataTag:
                return True
    return False


# Function which detects what delimiter is used in a CSV file
def detect_delimiter(filename):
    """
    Heuristic delimiter detection: count commas, semicolons, tabs—
    choose whichever appears most in the first 2Kb of the CSV file.
    
    Parameters:
    - filename: CSV file containing raw data.
    """
    with open(filename, 'r', newline='') as f:
        sample = f.read(2048)
    counts = {
        ',': sample.count(','),
        ';': sample.count(';'),
        '\t': sample.count('\t')
    }
    # pick the delimiter with the highest count; tie-break in favor of comma
    delim = max(counts, key=lambda k: (counts[k], k == ','))
    return delim



# Running the algorithm
process_experiments(rawFilename, procFilename)

TestNr1 already exists. Skipping.
testNr2 already exists. Skipping.
testNr3 already exists. Skipping.
testNr4 already exists. Skipping.
