In [4]:
import os
import pandas as pd
from datetime import datetime

# Path setup
raw_dir = 'raw'
processed_dir = 'processed'

# List all activity directories in the raw directory
activities = os.listdir(raw_dir)

# Iterate through each activity directory
for activity in activities:
    raw_activity_dir = os.path.join(raw_dir, activity)
    processed_activity_dir = os.path.join(processed_dir, activity)
    
    # Create corresponding directory in the processed folder if it doesn't exist
    os.makedirs(processed_activity_dir, exist_ok=True)
    
    # Iterate through each CSV file in the current activity directory
    for filename in os.listdir(raw_activity_dir):
        if filename.endswith('.csv'):
            raw_filepath = os.path.join(raw_activity_dir, filename)
            
            # Load the CSV file
            data = pd.read_csv(raw_filepath)
            
            # Convert 'time' column to datetime and calculate elapsed time in ms
            data['time'] = pd.to_datetime(data['time'], format='%H:%M:%S:%f')
            data['elapsed_time'] = (data['time'] - data['time'].iloc[0]).dt.total_seconds() * 1000
            
            # Initialize a list to store downsampled data
            downsampled_data = []

            # Define a 20 ms interval
            interval = 20
            start_time = 0
            
            while start_time < data['elapsed_time'].iloc[-1]:
                end_time = start_time + interval
                mask = (data['elapsed_time'] >= start_time) & (data['elapsed_time'] < end_time)
                group = data[mask]

                if not group.empty:
                    # Average the data in this group
                    avg_gFx = group['gFx'].mean()
                    avg_gFy = group['gFy'].mean()
                    avg_gFz = group['gFz'].mean()

                    # Append the averaged data
                    downsampled_data.append([avg_gFx, avg_gFy, avg_gFz])
                
                # Move to the next interval
                start_time += interval

            # Convert to DataFrame and round values to 7 decimal places
            downsampled_df = pd.DataFrame(downsampled_data, columns=['accx', 'accy', 'accz'])
            downsampled_df = downsampled_df.round(7)  # Round to 7 decimal places
            
            # Save the processed file to the processed directory
            processed_filepath = os.path.join(processed_activity_dir, filename)
            downsampled_df.to_csv(processed_filepath, index=False)
            
            print(f"Processed and saved: {processed_filepath}")


Processed and saved: processed\LAYING\Subject_1.csv
Processed and saved: processed\LAYING\Subject_10.csv
Processed and saved: processed\LAYING\Subject_11.csv
Processed and saved: processed\LAYING\Subject_12.csv
Processed and saved: processed\LAYING\Subject_2.csv
Processed and saved: processed\LAYING\Subject_3.csv
Processed and saved: processed\LAYING\Subject_4.csv
Processed and saved: processed\LAYING\Subject_5.csv
Processed and saved: processed\LAYING\Subject_6.csv
Processed and saved: processed\LAYING\Subject_7.csv
Processed and saved: processed\LAYING\Subject_8.csv
Processed and saved: processed\LAYING\Subject_9.csv
Processed and saved: processed\SITTING\Subject_1.csv
Processed and saved: processed\SITTING\Subject_10.csv
Processed and saved: processed\SITTING\Subject_11.csv
Processed and saved: processed\SITTING\Subject_12.csv
Processed and saved: processed\SITTING\Subject_2.csv
Processed and saved: processed\SITTING\Subject_3.csv
Processed and saved: processed\SITTING\Subject_4.csv

# Processing into required duration:
- 10 seconds of data will be taken from the total data. THis will be done by removing the first 4.5 seconds and the last 0.5 seconds. 

In [5]:
import os
import pandas as pd

# Define paths
base_dir = 'processed'
output_dir = 'processed_trimmed'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each activity directory
for activity in os.listdir(base_dir):
    activity_dir = os.path.join(base_dir, activity)
    
    if os.path.isdir(activity_dir):
        # Create the corresponding directory in the output folder
        output_activity_dir = os.path.join(output_dir, activity)
        os.makedirs(output_activity_dir, exist_ok=True)
        
        # Process each CSV file in the activity directory
        for filename in os.listdir(activity_dir):
            if filename.endswith('.csv'):
                input_filepath = os.path.join(activity_dir, filename)
                output_filepath = os.path.join(output_activity_dir, filename)
                
                # Load the CSV file
                data = pd.read_csv(input_filepath)
                
                # Remove the first 175 rows
                data_trimmed = data.iloc[175:]
                
                # Keep only the next 500 rows
                data_trimmed = data_trimmed.iloc[:500]
                
                # Save the processed file
                data_trimmed.to_csv(output_filepath, index=False)
                
                # Check if there are fewer than 25 rows left in the original file after the 675th row
                remaining_rows = len(data) - 675
                if remaining_rows < 25:
                    print(f"Warning: {filename} in {activity} has only {remaining_rows} rows left after processing.")
                
                print(f"Processed and saved: {output_filepath}")


Processed and saved: processed_trimmed\LAYING\Subject_1.csv
Processed and saved: processed_trimmed\LAYING\Subject_10.csv
Processed and saved: processed_trimmed\LAYING\Subject_11.csv
Processed and saved: processed_trimmed\LAYING\Subject_12.csv
Processed and saved: processed_trimmed\LAYING\Subject_2.csv
Processed and saved: processed_trimmed\LAYING\Subject_3.csv
Processed and saved: processed_trimmed\LAYING\Subject_4.csv
Processed and saved: processed_trimmed\LAYING\Subject_5.csv
Processed and saved: processed_trimmed\LAYING\Subject_6.csv
Processed and saved: processed_trimmed\LAYING\Subject_7.csv
Processed and saved: processed_trimmed\LAYING\Subject_8.csv
Processed and saved: processed_trimmed\LAYING\Subject_9.csv
Processed and saved: processed_trimmed\SITTING\Subject_1.csv
Processed and saved: processed_trimmed\SITTING\Subject_10.csv
Processed and saved: processed_trimmed\SITTING\Subject_11.csv
Processed and saved: processed_trimmed\SITTING\Subject_12.csv
Processed and saved: processed

In [None]:
# Dividing into train and test data

# Generating TSFEL Features:

In [12]:
import os
import pandas as pd
import tsfel
from pathlib import Path

base_dir = 'processed_dataset/Train'
output_base_dir = 'TSFEL_dataset/Train'

activities = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS']

for activity in activities:
    activity_dir = os.path.join(base_dir, activity)
    output_activity_dir = os.path.join(output_base_dir, activity)
    Path(output_activity_dir).mkdir(parents=True, exist_ok=True)
    subject_files = [f for f in os.listdir(activity_dir) if f.endswith('.csv')]
    for file in subject_files:
        file_path = os.path.join(activity_dir, file)
        df = pd.read_csv(file_path).iloc[:, :]
        cfg = tsfel.get_features_by_domain() 
        # print(cfg)
        for domain in cfg:
            for feature in cfg[domain]:
                cfg[domain][feature]['use'] = 'yes' # use all features, even ones disabled by default

        features = tsfel.time_series_features_extractor(cfg, df, fs=50) # sampling rate 50 Hz
        subject_id = file.split('.')[0]
        output_file = os.path.join(output_activity_dir, f'{subject_id}.csv')
        features.to_csv(output_file, index=False)


*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
*** Feature extraction started ***



*** Feature extraction finished ***
