# OMA Movement Aggregation

In [2]:
import pandas as pd

In [3]:
INPUT_FILE = '../data/processed/OMA_Score/OMA_Phase_Score.csv'
OUTPUT_FILE = '../data/processed/OMA_Score/OMA_Movement_Score.csv'

In [4]:
df = pd.read_csv(INPUT_FILE)

print(f"Loaded {len(df)} rows")
print(f"Columns: {df.columns.tolist()}")
print("\nFirst 10 rows:")
df.head(10)

Loaded 30 rows
Columns: ['PersonId', 'RörelseId', 'PhaseId', 'StartTid', 'SlutTid', 'Avg_Gem_Avvikelse']

First 10 rows:


Unnamed: 0,PersonId,RörelseId,PhaseId,StartTid,SlutTid,Avg_Gem_Avvikelse
0,fp1,2a_1,f1,0.0,0.01,0.12
1,fp1,2a_1,f2,0.01,0.01,0.0
2,fp1,2a_1,f3,0.01,0.03,0.2
3,fp2,2a_1,f1,0.06,0.07,1.4
4,fp2,2a_1,f2,0.07,0.07,2.0
5,fp2,2a_1,f3,0.07,0.09,0.9
6,fp3,2a_1,f1,0.07,0.08,0.67
7,fp3,2a_1,f2,0.08,0.08,1.0
8,fp3,2a_1,f3,0.08,0.1,0.8
9,fp4,2a_1,f1,0.05,0.06,0.62


## Group by Movement

We use `groupby()` to group the data by:
- **PersonId** 
- **RörelseId** (2a_1 or 2a_2)

For each group, we calculate:
- **StartTid** = minimum start time, start of first phase f1
- **SlutTid** = maximum end time, end of last phase f3

      This gives us the complete time span for the entire movement
- **Avg_Gem_Avvikelse** = mean of all phases averages



In [5]:
movement_df = df.groupby(['PersonId', 'RörelseId']).agg(
    StartTid=('StartTid', 'min'),
    SlutTid=('SlutTid', 'max'),
    Avg_Gem_Avvikelse=('Avg_Gem_Avvikelse', 'mean')
).reset_index()

movement_df = movement_df.sort_values(['RörelseId', 'PersonId']).reset_index(drop=True)

print(f"Aggregated from {len(df)} rows to {len(movement_df)} rows")
print("\nAll rows:")
movement_df

Aggregated from 30 rows to 10 rows

All rows:


Unnamed: 0,PersonId,RörelseId,StartTid,SlutTid,Avg_Gem_Avvikelse
0,fp1,2a_1,0.0,0.03,0.106667
1,fp2,2a_1,0.06,0.09,1.433333
2,fp3,2a_1,0.07,0.1,0.823333
3,fp4,2a_1,0.05,0.08,0.84
4,fp5,2a_1,0.06,0.09,0.796667
5,fp1,2a_2,0.04,0.07,0.0
6,fp2,2a_2,0.09,0.12,0.746667
7,fp3,2a_2,0.1,0.14,0.383333
8,fp4,2a_2,0.08,0.11,1.133333
9,fp5,2a_2,0.09,0.12,0.58


**Round the Average to 2 decimal places.**

In [6]:
movement_df['Avg_Gem_Avvikelse'] = movement_df['Avg_Gem_Avvikelse'].round(2)

print("Final columns:", movement_df.columns.tolist())
movement_df.head(10)

Final columns: ['PersonId', 'RörelseId', 'StartTid', 'SlutTid', 'Avg_Gem_Avvikelse']


Unnamed: 0,PersonId,RörelseId,StartTid,SlutTid,Avg_Gem_Avvikelse
0,fp1,2a_1,0.0,0.03,0.11
1,fp2,2a_1,0.06,0.09,1.43
2,fp3,2a_1,0.07,0.1,0.82
3,fp4,2a_1,0.05,0.08,0.84
4,fp5,2a_1,0.06,0.09,0.8
5,fp1,2a_2,0.04,0.07,0.0
6,fp2,2a_2,0.09,0.12,0.75
7,fp3,2a_2,0.1,0.14,0.38
8,fp4,2a_2,0.08,0.11,1.13
9,fp5,2a_2,0.09,0.12,0.58


In [7]:
movement_df.to_csv(OUTPUT_FILE, index=False, encoding='utf-8-sig')
print(f"Data saved to: {OUTPUT_FILE}")

Data saved to: ../data/processed/OMA_Score/OMA_Movement_Score.csv



### Calculate the **overall average** for each movement across **all persons**.

This gives us 2 rows total:
- 2a_1: Sittande till stående 
- 2a_2: Stående till sittande

In [8]:
movement_avg = movement_df.groupby(['RörelseId']).agg(
    Avg_Gem_Avvikelse=('Avg_Gem_Avvikelse', 'mean')
).reset_index()

movement_avg['Avg_Gem_Avvikelse'] = movement_avg['Avg_Gem_Avvikelse'].round(2)

print(movement_avg)

  RörelseId  Avg_Gem_Avvikelse
0      2a_1               0.80
1      2a_2               0.57


---

## Validation

In [9]:
print("Data Overview")
print(f"Shape: {movement_df.shape[0]} rows, {movement_df.shape[1]} columns")
print(f"\nUnique persons: {movement_df['PersonId'].nunique()}")
print(f"Unique movements: {movement_df['RörelseId'].nunique()}")

Data Overview
Shape: 10 rows, 5 columns

Unique persons: 5
Unique movements: 2


In [10]:
print("\nData Structure by Movement")
for movement in movement_df['RörelseId'].unique():
    print(f"\n=== Movement: {movement} ===")
    movement_data = movement_df[movement_df['RörelseId'] == movement]
    print(movement_data[['PersonId', 'StartTid', 'SlutTid', 'Avg_Gem_Avvikelse']].to_string(index=False))
    print("-" * 50)


Data Structure by Movement

=== Movement: 2a_1 ===
PersonId  StartTid  SlutTid  Avg_Gem_Avvikelse
     fp1      0.00     0.03               0.11
     fp2      0.06     0.09               1.43
     fp3      0.07     0.10               0.82
     fp4      0.05     0.08               0.84
     fp5      0.06     0.09               0.80
--------------------------------------------------

=== Movement: 2a_2 ===
PersonId  StartTid  SlutTid  Avg_Gem_Avvikelse
     fp1      0.04     0.07               0.00
     fp2      0.09     0.12               0.75
     fp3      0.10     0.14               0.38
     fp4      0.08     0.11               1.13
     fp5      0.09     0.12               0.58
--------------------------------------------------
