In [None]:
# =========================
# STATIM DATA PROCESSING NOTEBOOK
# =========================

# 1️⃣ Imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

sns.set(style="whitegrid")

# 2️⃣ Paths
DATA_DIR = "../data"
PROCESSED_DIR = "../processed"
RESULTS_DIR = "../results"

os.makedirs(PROCESSED_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

LOG_FILE = os.path.join(RESULTS_DIR, "processing_log.txt")

# Clear previous log
with open(LOG_FILE, 'w') as f:
    f.write("Processing log\n")
    f.write("====================\n")

# 3️⃣ Helper functions
def log(message):
    """Log a message to the log file and notebook output"""
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    with open(LOG_FILE, 'a') as f:
        f.write(f"[{timestamp}] {message}\n")
    print(message)

def parse_txt_file(file_path):
    """
    Parse a Statim TXT file into a dictionary.
    Assumes 'key: value' per line; adjust parsing as needed.
    """
    data = {}
    try:
        with open(file_path, 'r') as f:
            for line in f:
                line = line.strip()
                if ":" in line:
                    key, value = line.split(":", 1)
                    data[key.strip()] = value.strip()
        return data
    except Exception as e:
        return None, str(e)

# 4️⃣ Processing all files
all_records = []

for statim_unit in ["StatimA", "StatimB"]:
    folder_path = os.path.join(DATA_DIR, statim_unit)
    log(f"Processing folder: {folder_path}")
    
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(".txt"):
                path = os.path.join(root, file)
                record, error = parse_txt_file(path) if 'parse_txt_file' in globals() else (None, None)
                
                if record:
                    record["Statim"] = statim_unit
                    record["Filename"] = file
                    all_records.append(record)
                    log(f"Processed: {path}")
                else:
                    log(f"Failed: {path} | Error: {error}")

# 5️⃣ Convert to CSV
if all_records:
    df = pd.DataFrame(all_records)
    
    # Optional: reorder columns
    cols = ["Statim", "Filename"] + [c for c in df.columns if c not in ["Statim", "Filename"]]
    df = df[cols]
    
    output_file = os.path.join(PROCESSED_DIR, "statim_data_combined.csv")
    df.to_csv(output_file, index=False)
    log(f"CSV created: {output_file}")
else:
    log("No data processed. CSV not created.")

# 6️⃣ Basic visualization
if all_records:
    plt.figure(figsize=(8,5))
    sns.countplot(data=df, x="Statim")
    plt.title("Number of Files Processed per Statim Unit")
    plt.show()
