In [6]:
import sys
import os

# Add the src/ folder to Python's import path
src_path = os.path.abspath(os.path.join("..", "src"))
if src_path not in sys.path:
    sys.path.append(src_path)
    
import pandas as pd


def split_csv_by_rows(input_path, output_dir, rows_per_chunk=30, label="control"):
    """
    Splits a large CSV into smaller files with `rows_per_chunk` rows each.
    Saves them in `output_dir` with a label prefix.
    """
    os.makedirs(output_dir, exist_ok=True)
    df = pd.read_csv(input_path)

    total_rows = len(df)
    num_chunks = total_rows // rows_per_chunk

    for i in range(num_chunks):
        chunk = df.iloc[i * rows_per_chunk : (i + 1) * rows_per_chunk]
        output_path = os.path.join(output_dir, f"{label}_sample_{i}.csv")
        chunk.to_csv(output_path, index=False)

    print(f"✅ Split into {num_chunks} chunks in '{output_dir}'")

In [7]:
split_csv_by_rows("data/raw/control_sample.csv", "data/raw/split", rows_per_chunk=1, label="control")
split_csv_by_rows("data/raw/mtbi_sample.csv", "data/raw/split", rows_per_chunk=1, label="mtbi")

✅ Split into 150 chunks in 'data/raw/split'
✅ Split into 150 chunks in 'data/raw/split'
