### Data Prparation Segmentation


Orestis Antonis Makris
1084516

Import necessary libraries and utility functions.

In [1]:
import os
import yaml
import pandas as pd
import matplotlib.pyplot as plt
from utils import align_and_combine_data
from utils import segment_data


Load the configuration settings from the config.yml file.


In [2]:
config_path = os.path.join(os.getcwd(), "config.yml")

with open(config_path) as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

 Define the base directory and create a directory to store merged data if it doesn't exist.

In [3]:
# Define the base directory
base_dir = config["data_path"]

# Create a directory to store merged data if it doesn't exist
merged_dir = config["single_instance_path"]

os.makedirs(merged_dir, exist_ok=True)

Traverse the directory structure to process the gyroscope and accelerometer files.
```
.
└── data/
    ├── class_A/
    │   ├── (1) - class_A_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    |   |       - class_A_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    │   ├── (2) - class_A_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    |   |       - class_A_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    │   ├── (3)
    │   └── ..
    ├── class_B/
    │   ├── (1) - class_B_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    |   |       - class_B_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    │   ├── (2) - class_B_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    |   |       - class_B_100000_MetaWear_2024-05-24T14.04.31.577_F2E51B55F6EA_Accelerometer_100.000Hz_1.7.3.csv 
    │   ├── (3)
    │   └── ..
    └── class ...
```
This cell processes each class and instance directory to align and combine data, segment the data,and save the segmented data to CSV files.

In [4]:
# Traverse the directory structure
for class_dir in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_dir)
    
    if os.path.isdir(class_path):
        # Create a class-specific directory inside merged_dir
        class_merged_dir = os.path.join(merged_dir, class_dir)
        os.makedirs(class_merged_dir, exist_ok=True)
        
        for instance_dir in os.listdir(class_path):
            instance_path = os.path.join(class_path, instance_dir)
            
            if os.path.isdir(instance_path):
                gyro_file = None
                accel_file = None
                
                # Find the gyroscope and accelerometer files in the instance directory
                for file in os.listdir(instance_path):
                    if 'Gyroscope' in file:
                        gyro_file = os.path.join(instance_path, file)
                    elif 'Accelerometer' in file:
                        accel_file = os.path.join(instance_path, file)
                
                # Process the data if both gyroscope and accelerometer files are found
                if gyro_file and accel_file:
                    print(f"Processing {class_dir}/{instance_dir}")
                    
                    # Load the gyroscope and accelerometer data into DataFrames
                    gyro_df = pd.read_csv(gyro_file)
                    accel_df = pd.read_csv(accel_file)
                    
                    # Align and combine the gyroscope and accelerometer data
                    combined_df = align_and_combine_data(gyro_df, accel_df)
                    
                    # Segment the combined data into a specified number of segments
                    num_segments = 15
                    segments = segment_data(combined_df, num_segments)
                    
                    # Extract the base name of the original gyroscope file for naming the segments
                    base_filename = os.path.splitext(os.path.basename(gyro_file))[0]
                    
                    # Save each segment as a separate CSV file in the class-specific directory
                    for idx, segment in enumerate(segments):
                        segment_filename = os.path.join(class_merged_dir, f'sample_number_{idx + 1}_{base_filename}.csv')
                        segment.to_csv(segment_filename, index=False)
                        print(f"Saved {segment_filename}")


Processing Anti_Clock_Wise/(10)
Saved ./merged_data/Anti_Clock_Wise\sample_number_1_Anti Clock Wise 15 Sofia 100000 (5) _MetaWear_2024-06-02T18.49.17.607_F2E51B55F6EA_Gyroscope_100.000Hz_1.7.3.csv
Saved ./merged_data/Anti_Clock_Wise\sample_number_2_Anti Clock Wise 15 Sofia 100000 (5) _MetaWear_2024-06-02T18.49.17.607_F2E51B55F6EA_Gyroscope_100.000Hz_1.7.3.csv
Saved ./merged_data/Anti_Clock_Wise\sample_number_3_Anti Clock Wise 15 Sofia 100000 (5) _MetaWear_2024-06-02T18.49.17.607_F2E51B55F6EA_Gyroscope_100.000Hz_1.7.3.csv
Saved ./merged_data/Anti_Clock_Wise\sample_number_4_Anti Clock Wise 15 Sofia 100000 (5) _MetaWear_2024-06-02T18.49.17.607_F2E51B55F6EA_Gyroscope_100.000Hz_1.7.3.csv
Saved ./merged_data/Anti_Clock_Wise\sample_number_5_Anti Clock Wise 15 Sofia 100000 (5) _MetaWear_2024-06-02T18.49.17.607_F2E51B55F6EA_Gyroscope_100.000Hz_1.7.3.csv
Saved ./merged_data/Anti_Clock_Wise\sample_number_6_Anti Clock Wise 15 Sofia 100000 (5) _MetaWear_2024-06-02T18.49.17.607_F2E51B55F6EA_Gyroscop