In [1]:
import pandas as pd
import glob
import os

# Base directory
base_dir = "/home/chris/Documents/markov_model"

activities = ["being_still", "jumping"]
data_frames = []

for activity in activities:
    activity_path = os.path.join(base_dir, activity)
    
    # Loop through each recording folder
    for folder in os.listdir(activity_path):
        folder_path = os.path.join(activity_path, folder)
        
        acc_path = os.path.join(folder_path, "Accelerometer.csv")
        gyro_path = os.path.join(folder_path, "Gyroscope.csv")
        
        if os.path.exists(acc_path) and os.path.exists(gyro_path):
            # Read accelerometer
            acc = pd.read_csv(acc_path)
            acc = acc.rename(columns={"seconds_elapsed": "time_acc", "x": "acc_x", "y": "acc_y", "z": "acc_z"})
            
            # Read gyroscope
            gyro = pd.read_csv(gyro_path)
            gyro = gyro.rename(columns={"seconds_elapsed": "time_gyro", "x": "gyro_x", "y": "gyro_y", "z": "gyro_z"})
            
            # Merge using nearest time match
            merged = pd.merge_asof(
                acc.sort_values("time_acc"),
                gyro.sort_values("time_gyro"),
                left_on="time_acc",
                right_on="time_gyro",
                direction="nearest"
            )
            
            merged["activity"] = activity
            data_frames.append(merged)

# Combine all data
all_data = pd.concat(data_frames, ignore_index=True)

# Keep only relevant columns
all_data = all_data[["time_acc", "acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z", "activity"]]

# Save clean data
output_path = os.path.join(base_dir, "cleaned_data.csv")
all_data.to_csv(output_path, index=False)

print(f"✅ Cleaned dataset saved at: {output_path}")
print(all_data.head())


✅ Cleaned dataset saved at: /home/chris/Documents/markov_model/cleaned_data.csv
   time_acc     acc_x     acc_y     acc_z    gyro_x    gyro_y    gyro_z  \
0  0.239432  0.077198  0.000430 -0.037683  0.020769 -0.034666  0.036040   
1  0.259198 -0.059019 -0.036766  0.021701  0.008705 -0.046883  0.047341   
2  0.278963  0.231731 -0.056161  0.098673  0.024128 -0.035582  0.024587   
3  0.298728  0.134657 -0.083361  0.000985  0.025961 -0.042607  0.002291   
4  0.318493 -0.033980 -0.061132 -0.020469  0.020769 -0.046730  0.012064   

      activity  
0  being_still  
1  being_still  
2  being_still  
3  being_still  
4  being_still  


In [4]:
df = pd.read_csv("/home/chris/Documents/markov_model/cleaned_data.csv")
df.head(10)


Unnamed: 0,time_acc,acc_x,acc_y,acc_z,gyro_x,gyro_y,gyro_z,activity
0,0.239432,0.077198,0.00043,-0.037683,0.020769,-0.034666,0.03604,being_still
1,0.259198,-0.059019,-0.036766,0.021701,0.008705,-0.046883,0.047341,being_still
2,0.278963,0.231731,-0.056161,0.098673,0.024128,-0.035582,0.024587,being_still
3,0.298728,0.134657,-0.083361,0.000985,0.025961,-0.042607,0.002291,being_still
4,0.318493,-0.03398,-0.061132,-0.020469,0.020769,-0.04673,0.012064,being_still
5,0.338259,0.276229,0.016143,-0.096955,0.020769,-0.04673,0.012064,being_still
6,0.357993,0.243545,-0.034278,0.122898,0.029015,-0.026572,-0.038942,being_still
7,0.377371,-0.030786,-0.032103,0.041919,0.038636,-0.025961,-0.022754,being_still
8,0.396749,-0.044982,-0.008172,0.00644,0.031917,-0.037109,0.004429,being_still
9,0.416127,0.118614,0.029891,0.017731,0.034055,-0.033597,0.00168,being_still


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13283 entries, 0 to 13282
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   time_acc  13283 non-null  float64
 1   acc_x     13283 non-null  float64
 2   acc_y     13283 non-null  float64
 3   acc_z     13283 non-null  float64
 4   gyro_x    13283 non-null  float64
 5   gyro_y    13283 non-null  float64
 6   gyro_z    13283 non-null  float64
 7   activity  13283 non-null  object 
dtypes: float64(7), object(1)
memory usage: 830.3+ KB


In [6]:
df.describe

<bound method NDFrame.describe of         time_acc     acc_x     acc_y     acc_z    gyro_x    gyro_y    gyro_z  \
0       0.239432  0.077198  0.000430 -0.037683  0.020769 -0.034666  0.036040   
1       0.259198 -0.059019 -0.036766  0.021701  0.008705 -0.046883  0.047341   
2       0.278963  0.231731 -0.056161  0.098673  0.024128 -0.035582  0.024587   
3       0.298728  0.134657 -0.083361  0.000985  0.025961 -0.042607  0.002291   
4       0.318493 -0.033980 -0.061132 -0.020469  0.020769 -0.046730  0.012064   
...          ...       ...       ...       ...       ...       ...       ...   
13278  12.065239 -2.033003 -1.008866  1.481536 -0.250600 -0.050548  0.033444   
13279  12.084615 -0.167751 -1.044221  2.095463 -0.001222  0.270758  0.196998   
13280  12.103992  0.739180 -1.114350  1.417560  0.190432  0.267398  0.064597   
13281  12.123369  0.079317 -1.258459  1.212358  0.308172 -0.194249 -0.054060   
13282  12.142746 -1.418104 -1.225571  1.628977  0.308172 -0.194249 -0.054060   

     