In [1]:
import pandas as pd
import os

# Base directory (defaults to the notebook's working directory)
base_dir = os.getcwd()

activities = ["being_still", "jumping", "standing", "walking"]
data_frames = []

for activity in activities:
    activity_path = os.path.join(base_dir, activity)
    if not os.path.isdir(activity_path):
        continue  # Skip activities that are not available in the current workspace
    
    # Loop through each recording folder
    for folder in os.listdir(activity_path):
        folder_path = os.path.join(activity_path, folder)
        if not os.path.isdir(folder_path):
            continue

        acc_path = os.path.join(folder_path, "Accelerometer.csv")
        gyro_path = os.path.join(folder_path, "Gyroscope.csv")
        
        if os.path.exists(acc_path) and os.path.exists(gyro_path):
            # Read accelerometer and rename columns
            acc = pd.read_csv(acc_path)
            acc = acc.rename(columns={"seconds_elapsed": "time_acc", "x": "acc_x", "y": "acc_y", "z": "acc_z"})
            
            # Read gyroscope and rename columns
            gyro = pd.read_csv(gyro_path)
            gyro = gyro.rename(columns={"seconds_elapsed": "time_gyro", "x": "gyro_x", "y": "gyro_y", "z": "gyro_z"})
            
            # Merge using nearest time match to align sensors
            merged = pd.merge_asof(
                acc.sort_values("time_acc"),
                gyro.sort_values("time_gyro"),
                left_on="time_acc",
                right_on="time_gyro",
                direction="nearest"
            )
            
            merged["activity"] = activity
            data_frames.append(merged)

# Combine all data
all_data = pd.concat(data_frames, ignore_index=True)

# Keep only relevant columns
all_data = all_data[["time_acc", "acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z", "activity"]]

# Save clean data
output_path = os.path.join(base_dir, "cleaned_data.csv")
all_data.to_csv(output_path, index=False)

print(f"✅ Cleaned dataset saved at: {output_path}")
print(all_data.head())

✅ Cleaned dataset saved at: e:\school\Hidden-Markov-Models\cleaned_data.csv
   time_acc     acc_x     acc_y     acc_z    gyro_x    gyro_y    gyro_z  \
0  0.246739 -0.001404 -0.129269 -0.349002  0.074371 -0.270147 -0.285266   
1  0.266505  0.087285 -0.215736 -0.538307  0.049937 -0.317641 -0.270605   
2  0.286270 -0.048985 -0.261851 -0.547884 -0.014355 -0.254265 -0.251516   
3  0.306035 -0.292397 -0.172817 -0.586691 -0.072080 -0.165998 -0.193180   
4  0.325801  0.010153  0.061252 -0.542238 -0.158668 -0.148589 -0.097888   

      activity  
0  being_still  
1  being_still  
2  being_still  
3  being_still  
4  being_still  


In [4]:
df = pd.read_csv("cleaned_data.csv")
df.head(10)


Unnamed: 0,time_acc,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z,activity
0,0.246739,-0.001404,-0.129269,-0.349002,0.074371,-0.270147,-0.285266,being_still
1,0.266505,0.087285,-0.215736,-0.538307,0.049937,-0.317641,-0.270605,being_still
2,0.28627,-0.048985,-0.261851,-0.547884,-0.014355,-0.254265,-0.251516,being_still
3,0.306035,-0.292397,-0.172817,-0.586691,-0.07208,-0.165998,-0.19318,being_still
4,0.325801,0.010153,0.061252,-0.542238,-0.158668,-0.148589,-0.097888,being_still
5,0.345566,0.259582,-0.083983,-0.024492,-0.215629,-0.11377,-0.02367,being_still
6,0.365235,0.318745,-0.076979,-0.052772,-0.146756,-0.084908,-0.02138,being_still
7,0.384613,0.190015,0.216441,-0.452172,-0.109189,-0.064597,-0.028863,being_still
8,0.40399,0.396254,0.216906,-0.383239,-0.153475,-0.062612,-0.004276,being_still
9,0.423368,0.528011,0.060905,-0.331462,-0.212728,-0.080326,-0.015577,being_still


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22508 entries, 0 to 22507
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   time_acc  22508 non-null  float64
 1   acc_x     22508 non-null  float64
 2   acc_y     22508 non-null  float64
 3   acc_z     22508 non-null  float64
 4   gyro_x    22508 non-null  float64
 5   gyro_y    22508 non-null  float64
 6   gyro_z    22508 non-null  float64
 7   activity  22508 non-null  object 
dtypes: float64(7), object(1)
memory usage: 1.4+ MB


In [6]:
df.describe

<bound method NDFrame.describe of        time_acc     acc_x     acc_y     acc_z    gyro_x    gyro_y    gyro_z  \
0      0.246739 -0.001404 -0.129269 -0.349002  0.074371 -0.270147 -0.285266   
1      0.266505  0.087285 -0.215736 -0.538307  0.049937 -0.317641 -0.270605   
2      0.286270 -0.048985 -0.261851 -0.547884 -0.014355 -0.254265 -0.251516   
3      0.306035 -0.292397 -0.172817 -0.586691 -0.072080 -0.165998 -0.193180   
4      0.325801  0.010153  0.061252 -0.542238 -0.158668 -0.148589 -0.097888   
...         ...       ...       ...       ...       ...       ...       ...   
22503  6.230723 -2.278307 -1.405119  1.247207 -0.627186 -1.316433 -1.057001   
22504  6.250844 -3.222405  0.320520 -0.943161 -1.116393 -0.949310 -1.091990   
22505  6.270967 -3.964756  1.505194 -1.685590 -1.341156 -0.695751 -1.320124   
22506  6.291089 -2.316826 -0.400352 -3.970996 -1.263178 -1.212012 -1.515657   
22507  6.311211 -0.320936 -0.550874 -7.928246 -0.549595 -1.621374 -1.227497   

          activit