# OMA Phase Aggregation

In [1]:
import pandas as pd

In [2]:
INPUT_FILE = '../data/processed/OMA_Score/OMA_Score.csv'
OUTPUT_FILE = '../data/processed/OMA_Score/OMA_Phase_Score.csv'

In [3]:
df = pd.read_csv(INPUT_FILE)

print(f"Loaded {len(df)} rows")
print(f"Columns: {df.columns.tolist()}")
print("\nFirst 5 rows:")
df.head()

Loaded 245 rows
Columns: ['PersonId', 'RörelseId', 'PhaseId', 'StartTid', 'SlutTid', 'AspektId', 'Gem_Avvikelse']

First 5 rows:


Unnamed: 0,PersonId,RörelseId,PhaseId,StartTid,SlutTid,AspektId,Gem_Avvikelse
0,fp1,2a_1,f1,0.0,0.01,1,0.0
1,fp1,2a_1,f1,0.0,0.01,2,0.0
2,fp1,2a_1,f1,0.0,0.01,3,0.0
3,fp1,2a_1,f1,0.0,0.01,4,0.0
4,fp1,2a_1,f1,0.0,0.01,5,


In [4]:
df['Gem_Avvikelse'] = pd.to_numeric(df['Gem_Avvikelse'], errors='coerce')

print("Gem_Avvikelse statistics:")
print(f"  Valid values: {df['Gem_Avvikelse'].notna().sum()}")
print(f"  Missing values (NaN): {df['Gem_Avvikelse'].isna().sum()}")

Gem_Avvikelse statistics:
  Valid values: 174
  Missing values (NaN): 71


## Group by Phase and Calculate Average

We use `groupby()` to group the data by:
- **PersonId** 
- **RörelseId** (2a_1 or 2a_2)
- **PhaseId** (f1, f2, f3)

For each group, we calculate:
- **StartTid** = minimum start time, first aspect start
- **SlutTid** = maximum end time, last aspect end
- **Avg_Gem_Avvikelse** = mean of Gem_Avvikelse 

**Note:** The mean only uses valid values. For example, if f1 has 11 aspects but only 8 have scores, the mean is calculated from those 8 values.

In [5]:
phase_df = df.groupby(['PersonId', 'RörelseId', 'PhaseId']).agg(
    StartTid=('StartTid', 'min'),
    SlutTid=('SlutTid', 'max'),
    Avg_Gem_Avvikelse=('Gem_Avvikelse', 'mean')
).reset_index()

phase_df = phase_df.sort_values(['RörelseId', 'PersonId', 'PhaseId']).reset_index(drop=True)

print(f"Aggregated from {len(df)} rows to {len(phase_df)} rows")
print("\nFirst 15 rows:")
phase_df.head(15)

Aggregated from 245 rows to 30 rows

First 15 rows:


Unnamed: 0,PersonId,RörelseId,PhaseId,StartTid,SlutTid,Avg_Gem_Avvikelse
0,fp1,2a_1,f1,0.0,0.01,0.125
1,fp1,2a_1,f2,0.01,0.01,0.0
2,fp1,2a_1,f3,0.01,0.03,0.2
3,fp2,2a_1,f1,0.06,0.07,1.4
4,fp2,2a_1,f2,0.07,0.07,2.0
5,fp2,2a_1,f3,0.07,0.09,0.9
6,fp3,2a_1,f1,0.07,0.08,0.666667
7,fp3,2a_1,f2,0.08,0.08,1.0
8,fp3,2a_1,f3,0.08,0.1,0.8
9,fp4,2a_1,f1,0.05,0.06,0.625


**Round the Average to 2 decimal places.**

In [6]:
phase_df['Avg_Gem_Avvikelse'] = phase_df['Avg_Gem_Avvikelse'].round(2)

print("Final columns:", phase_df.columns.tolist())
phase_df.head(10)

Final columns: ['PersonId', 'RörelseId', 'PhaseId', 'StartTid', 'SlutTid', 'Avg_Gem_Avvikelse']


Unnamed: 0,PersonId,RörelseId,PhaseId,StartTid,SlutTid,Avg_Gem_Avvikelse
0,fp1,2a_1,f1,0.0,0.01,0.12
1,fp1,2a_1,f2,0.01,0.01,0.0
2,fp1,2a_1,f3,0.01,0.03,0.2
3,fp2,2a_1,f1,0.06,0.07,1.4
4,fp2,2a_1,f2,0.07,0.07,2.0
5,fp2,2a_1,f3,0.07,0.09,0.9
6,fp3,2a_1,f1,0.07,0.08,0.67
7,fp3,2a_1,f2,0.08,0.08,1.0
8,fp3,2a_1,f3,0.08,0.1,0.8
9,fp4,2a_1,f1,0.05,0.06,0.62


In [7]:
phase_df.to_csv(OUTPUT_FILE, index=False, encoding='utf-8-sig')
print(f"Data saved to: {OUTPUT_FILE}")

Data saved to: ../data/processed/OMA_Score/OMA_Phase_Score.csv


---

## Validation

In [8]:
print("Data Overview")
print(f"Shape: {phase_df.shape[0]} rows, {phase_df.shape[1]} columns")
print(f"\nUnique persons: {phase_df['PersonId'].nunique()}")
print(f"Unique movements: {phase_df['RörelseId'].nunique()}")
print(f"Unique phases: {phase_df['PhaseId'].nunique()}")

Data Overview
Shape: 30 rows, 6 columns

Unique persons: 5
Unique movements: 2
Unique phases: 3


In [9]:
print("\nData Structure by Movement")
for movement in phase_df['RörelseId'].unique():
    print(f"\n=== Movement: {movement} ===")
    movement_data = phase_df[phase_df['RörelseId'] == movement]
    print(movement_data[['PersonId', 'PhaseId', 'StartTid', 'SlutTid', 'Avg_Gem_Avvikelse']].to_string(index=False))
    print("-" * 60)


Data Structure by Movement

=== Movement: 2a_1 ===
PersonId PhaseId  StartTid  SlutTid  Avg_Gem_Avvikelse
     fp1      f1      0.00     0.01               0.12
     fp1      f2      0.01     0.01               0.00
     fp1      f3      0.01     0.03               0.20
     fp2      f1      0.06     0.07               1.40
     fp2      f2      0.07     0.07               2.00
     fp2      f3      0.07     0.09               0.90
     fp3      f1      0.07     0.08               0.67
     fp3      f2      0.08     0.08               1.00
     fp3      f3      0.08     0.10               0.80
     fp4      f1      0.05     0.06               0.62
     fp4      f2      0.06     0.06               1.00
     fp4      f3      0.06     0.08               0.90
     fp5      f1      0.06     0.07               0.89
     fp5      f2      0.07     0.07               1.00
     fp5      f3      0.07     0.09               0.50
------------------------------------------------------------

=== Mo