In [3]:
import os
import pandas as pd
from datetime import datetime

# Path setup
raw_dir = 'raw'
processed_dir = 'processed'

# List all activity directories in the raw directory
activities = os.listdir(raw_dir)

# Iterate through each activity directory
for activity in activities:
    raw_activity_dir = os.path.join(raw_dir, activity)
    processed_activity_dir = os.path.join(processed_dir, activity)
    
    # Create corresponding directory in the processed folder if it doesn't exist
    os.makedirs(processed_activity_dir, exist_ok=True)
    
    # Iterate through each CSV file in the current activity directory
    for filename in os.listdir(raw_activity_dir):
        if filename.endswith('.csv'):
            raw_filepath = os.path.join(raw_activity_dir, filename)
            
            # Load the CSV file
            data = pd.read_csv(raw_filepath)
            
            # Convert 'time' column to datetime and calculate elapsed time in ms
            data['time'] = pd.to_datetime(data['time'], format='%H:%M:%S:%f')
            data['elapsed_time'] = (data['time'] - data['time'].iloc[0]).dt.total_seconds() * 1000
            
            # Initialize a list to store downsampled data
            downsampled_data = []

            # Define a 20 ms interval
            interval = 20
            start_time = 0
            
            while start_time < data['elapsed_time'].iloc[-1]:
                end_time = start_time + interval
                mask = (data['elapsed_time'] >= start_time) & (data['elapsed_time'] < end_time)
                group = data[mask]

                if not group.empty:
                    # Average the data in this group
                    avg_gFx = group['gFx'].mean()
                    avg_gFy = group['gFy'].mean()
                    avg_gFz = group['gFz'].mean()

                    # Append the averaged data
                    downsampled_data.append([avg_gFx, avg_gFy, avg_gFz])
                
                # Move to the next interval
                start_time += interval

            # Convert to DataFrame and round values to 7 decimal places
            downsampled_df = pd.DataFrame(downsampled_data, columns=['accx', 'accy', 'accz'])
            downsampled_df = downsampled_df.round(7)  # Round to 7 decimal places
            
            # Save the processed file to the processed directory
            processed_filepath = os.path.join(processed_activity_dir, filename)
            downsampled_df.to_csv(processed_filepath, index=False)
            
            print(f"Processed and saved: {processed_filepath}")


Processed and saved: processed\LAYING\Subject_1.csv
Processed and saved: processed\LAYING\Subject_10.csv
Processed and saved: processed\LAYING\Subject_11.csv
Processed and saved: processed\LAYING\Subject_12.csv
Processed and saved: processed\LAYING\Subject_2.csv
Processed and saved: processed\LAYING\Subject_3.csv
Processed and saved: processed\LAYING\Subject_4.csv
Processed and saved: processed\LAYING\Subject_5.csv
Processed and saved: processed\LAYING\Subject_6.csv
Processed and saved: processed\LAYING\Subject_7.csv
Processed and saved: processed\LAYING\Subject_8.csv
Processed and saved: processed\LAYING\Subject_9.csv
Processed and saved: processed\SITTING\Subject_1.csv
Processed and saved: processed\SITTING\Subject_10.csv
Processed and saved: processed\SITTING\Subject_11.csv
Processed and saved: processed\SITTING\Subject_12.csv
Processed and saved: processed\SITTING\Subject_2.csv
Processed and saved: processed\SITTING\Subject_3.csv
Processed and saved: processed\SITTING\Subject_4.csv