In [4]:
import pandas as pd
import re
import os

def read_and_clean_vilistus_csv(file_path, keep_time=False):
    # Auto-generate output Excel path
    base_name = os.path.splitext(file_path)[0]
    output_excel_path = base_name + ".xlsx"

    # Read file
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # Separate headers and data
    header_lines = []
    data_lines = []
    data_start = False

    for line in lines:
        if re.match(r"\d{2}:\d{2}:\d{2}:\d{3}", line):
            data_start = True
        if not data_start:
            header_lines.append(line.strip())
        else:
            data_lines.append(line.strip())

    # Extract headers
    headers = []
    for line in header_lines:
        match = re.match(r"Column #\d+\s*:\s*Sensor \d+\s*:\s*(.*?)\s+\*\*\*", line)
        if match:
            headers.append(match.group(1).strip())

    if not headers:
        raise ValueError("No valid headers found.")

    # Parse data
    data = []
    timestamps = []
    for line in data_lines:
        parts = [p.strip() for p in line.split(',')]
        if len(parts) < len(headers) + 1:
            continue  # skip bad rows
        try:
            if keep_time:
                timestamps.append(parts[0])
            values = [float(x) for x in parts[1:]]
            data.append(values)
        except ValueError:
            continue  # skip malformed floats

    # Build DataFrame
    df = pd.DataFrame(data, columns=headers)
    if keep_time:
        df.insert(0, "Timestamp", timestamps)

    # Save to Excel
    df.to_excel(output_excel_path, index=False)
    print(f"Cleaned data saved to: {output_excel_path}")

    return df


In [5]:
import os

# Loop through all CSV files in the current directory
for file in os.listdir('.'):
    if file.lower().endswith('.csv'):
        read_and_clean_vilistus_csv(file, keep_time=False)

Cleaned data saved to: 20015.xlsx
Cleaned data saved to: NA491.xlsx
Cleaned data saved to: MA301.xlsx
Cleaned data saved to: SA039.xlsx
Cleaned data saved to: RU745.xlsx
Cleaned data saved to: IN824.xlsx
Cleaned data saved to: AR833.xlsx
Cleaned data saved to: Ro590.xlsx
Cleaned data saved to: SA997.xlsx
Cleaned data saved to: MA662.xlsx
Cleaned data saved to: SA974.xlsx
Cleaned data saved to: MA598.xlsx
Cleaned data saved to: AB191.xlsx
Cleaned data saved to: UG111.xlsx
Cleaned data saved to: RO807.xlsx
Cleaned data saved to: PE472.xlsx
Cleaned data saved to: SU786.xlsx
Cleaned data saved to: VE008.xlsx
Cleaned data saved to: VI207.xlsx
Cleaned data saved to: RA147.xlsx


In [7]:
df=read_and_clean_vilistus_csv("20015.csv", keep_time=False)

Cleaned data saved to: 20015.xlsx


In [10]:
df["Alpha Amp."].abs().mean()


np.float64(0.501928313364266)