In [6]:
import os
import pandas as pd

def find_csv_files(base_dirs):
    """
    Recursively find all .csv files under each directory in base_dirs.
    """
    paths = []
    for base in base_dirs:
        for root, _, files in os.walk(base):
            for name in files:
                if name.lower().endswith(".csv"):
                    paths.append(os.path.join(root, name))
    return paths

# 1) Define your data folders
base_dirs = ["data/Dataset", "data/old age"]

# 2) Discover all CSV files
csv_files = find_csv_files(base_dirs)
print(f"🔍 Found {len(csv_files)} CSV file(s):")
for fp in csv_files:
    print("  •", fp)

# 3) Process each file
rows = []
for fp in csv_files:
    # --- Metadata row: second line, columns 0–4
    #    header=None: don’t treat any line as header
    #    skiprows=1: skip the first line (the column names)
    #    nrows=1: read exactly one line
    meta = pd.read_csv(fp,
                       header=None,
                       skiprows=1,
                       nrows=1,
                       usecols=[0,1,2,3,4])
    liver_health, age, gender, weight, height = meta.iloc[0]

    # --- Data block: header on the third line
    df = pd.read_csv(fp, header=2)

    # --- Extract the "Average" row (case-insensitive)
    avg_row = df[df["Sample"].astype(str).str.lower() == "average"]
    if avg_row.empty:
        raise ValueError(f"No 'Average' row found in {fp}")
    avg = avg_row.iloc[0]

    # --- Build a flat dict
    rows.append({
        "Liver Health": liver_health,
        "Age":          age,
        "Gender":       gender,
        "Weight":       weight,
        "Height":       height,
        "R":             avg["R"],
        "G":             avg["G"],
        "B":             avg["B"],
        "C":             avg["C"],
        "Temp9061":      avg["Temp90614"],
        "Temp9064":      avg["Temp90640"],
        "GSR":           avg["GSR"],
        "BMI":           avg["BMI"],
    })

# 4) Assemble and write out
master_df = pd.DataFrame(rows, columns=[
    "Liver Health","Age","Gender","Weight","Height",
    "R","G","B","C","Temp9061","Temp9064","GSR","BMI"
])
master_df.to_csv("aggregated.csv", index=False)
print(f"✅ Aggregated {len(master_df)} files → aggregated.csv")


🔍 Found 39 CSV file(s):
  • data/Dataset\s1.csv
  • data/Dataset\s10.csv
  • data/Dataset\s11.csv
  • data/Dataset\s12.csv
  • data/Dataset\s13.csv
  • data/Dataset\s14.csv
  • data/Dataset\s15.csv
  • data/Dataset\s16.csv
  • data/Dataset\s17.csv
  • data/Dataset\s18.csv
  • data/Dataset\s19.csv
  • data/Dataset\s2.csv
  • data/Dataset\s20.csv
  • data/Dataset\s21.csv
  • data/Dataset\s23.csv
  • data/Dataset\s24.csv
  • data/Dataset\s3.csv
  • data/Dataset\s4.csv
  • data/Dataset\s5.csv
  • data/Dataset\s6.csv
  • data/Dataset\s7.csv
  • data/Dataset\s9.csv
  • data/Dataset\session (1).csv
  • data/Dataset\session.csv
  • data/old age\s1.csv
  • data/old age\s10.csv
  • data/old age\s11.csv
  • data/old age\s12.csv
  • data/old age\s15.csv
  • data/old age\s17.csv
  • data/old age\s19.csv
  • data/old age\s2.csv
  • data/old age\s21.csv
  • data/old age\s23.csv
  • data/old age\s24.csv
  • data/old age\s3.csv
  • data/old age\s5.csv
  • data/old age\s7.csv
  • data/old age\s9.csv
✅ A