In [13]:
import os
import pandas as pd

def process_activities(raw_dir, processed_dir):
    activities = os.listdir(raw_dir)

    for activity in activities:
        raw_activity_dir = os.path.join(raw_dir, activity)
        processed_activity_dir = os.path.join(processed_dir, activity)
        
        os.makedirs(processed_activity_dir, exist_ok=True)
        
        for filename in os.listdir(raw_activity_dir):
            if filename.endswith('.csv'):
                raw_filepath = os.path.join(raw_activity_dir, filename)
                
                # Read CSV and clean column names
                data = pd.read_csv(raw_filepath)
                data.columns = data.columns.str.strip()  # remove extra spaces
                
                # Make sure required columns exist
                if not {"time", "ax (m/s^2)", "ay (m/s^2)", "az (m/s^2)"}.issubset(set(data.columns)):
                    print(f"⚠️ Skipping {filename}, unexpected columns: {data.columns}")
                    continue

                # Convert time to ms relative to start
                data['elapsed_time'] = (data['time'] - data['time'].iloc[0]) * 1000

                downsampled_data = []
                interval = 20   # 20 ms = 50 Hz
                start_time = 0
                
                while start_time < data['elapsed_time'].iloc[-1]:
                    end_time = start_time + interval
                    mask = (data['elapsed_time'] >= start_time) & (data['elapsed_time'] < end_time)
                    group = data[mask]

                    if not group.empty:
                        avg_ax = group['ax (m/s^2)'].mean()
                        avg_ay = group['ay (m/s^2)'].mean()
                        avg_az = group['az (m/s^2)'].mean()
                        downsampled_data.append([avg_ax, avg_ay, avg_az])
                    
                    start_time += interval

                # Save downsampled dataframe
                downsampled_df = pd.DataFrame(downsampled_data, columns=['accx', 'accy', 'accz'])
                downsampled_df = downsampled_df.round(7)
                
                processed_filepath = os.path.join(processed_activity_dir, filename)
                downsampled_df.to_csv(processed_filepath, index=False)
                
                print(f"✅ Processed and saved: {processed_filepath}")

# Example u
process_activities("unprocessed", "processed")


✅ Processed and saved: processed\LAYING\Subject_1.csv
✅ Processed and saved: processed\LAYING\Subject_2.csv
✅ Processed and saved: processed\LAYING\Subject_3.csv
✅ Processed and saved: processed\SITTING\Subject_1.csv
✅ Processed and saved: processed\SITTING\Subject_2.csv
✅ Processed and saved: processed\SITTING\Subject_3.csv
✅ Processed and saved: processed\STANDING\Subject_1.csv
✅ Processed and saved: processed\STANDING\Subject_2.csv
✅ Processed and saved: processed\STANDING\Subject_3 .csv
✅ Processed and saved: processed\WALKING\Subject_1.csv
✅ Processed and saved: processed\WALKING\Subject_2.csv
✅ Processed and saved: processed\WALKING\Subject_3 .csv
✅ Processed and saved: processed\WALKING_DOWNSTAIRS\Subject_1.csv
✅ Processed and saved: processed\WALKING_DOWNSTAIRS\Subject_2.csv
✅ Processed and saved: processed\WALKING_DOWNSTAIRS\Subject_3.csv
✅ Processed and saved: processed\WALKING_UPSTAIRS\Subject_1.csv
✅ Processed and saved: processed\WALKING_UPSTAIRS\Subject_2.csv
✅ Processed 

In [18]:
base_dir = 'processed'
output_dir = 'processed_trimmed'

os.makedirs(output_dir, exist_ok=True)

for activity in os.listdir(base_dir):
    activity_dir = os.path.join(base_dir, activity)
    
    if os.path.isdir(activity_dir):
        output_activity_dir = os.path.join(output_dir, activity)
        os.makedirs(output_activity_dir, exist_ok=True)
        
        for filename in os.listdir(activity_dir):
            if filename.endswith('.csv'):
                input_filepath = os.path.join(activity_dir, filename)
                output_filepath = os.path.join(output_activity_dir, filename)
                
                data = pd.read_csv(input_filepath)
                
                data_trimmed = data.iloc[200:]
                
                data_trimmed = data_trimmed.iloc[:500]
                
                data_trimmed.to_csv(output_filepath, index=False)
                
                remaining_rows = len(data) - 675
                if remaining_rows < 25:
                    print(f"Warning: {filename} in {activity} has only {remaining_rows} rows left after processing.")
                
                print(f"Processed and saved: {output_filepath}")

Processed and saved: processed_trimmed\LAYING\Subject_1.csv
Processed and saved: processed_trimmed\LAYING\Subject_2.csv
Processed and saved: processed_trimmed\LAYING\Subject_3.csv
Processed and saved: processed_trimmed\SITTING\Subject_1.csv
Processed and saved: processed_trimmed\SITTING\Subject_2.csv
Processed and saved: processed_trimmed\SITTING\Subject_3.csv
Processed and saved: processed_trimmed\STANDING\Subject_1.csv
Processed and saved: processed_trimmed\STANDING\Subject_2.csv
Processed and saved: processed_trimmed\STANDING\Subject_3 .csv
Processed and saved: processed_trimmed\WALKING\Subject_1.csv
Processed and saved: processed_trimmed\WALKING\Subject_2.csv
Processed and saved: processed_trimmed\WALKING\Subject_3 .csv
Processed and saved: processed_trimmed\WALKING_DOWNSTAIRS\Subject_1.csv
Processed and saved: processed_trimmed\WALKING_DOWNSTAIRS\Subject_2.csv
Processed and saved: processed_trimmed\WALKING_DOWNSTAIRS\Subject_3.csv
Processed and saved: processed_trimmed\WALKING_UPS