In [36]:
import bambi as bmb
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import csv
from collections import defaultdict
from IPython.display import display
from scipy import stats
import pymc as py
import arviz as az
import json

In [37]:
soci_data = pd.read_csv("C:/Users/LauflaborVR2/GS-MT_Test/Data/main_presence_questionaire.csv")
heights = soci_data["Height"]
weights = soci_data["Weight"]

In [55]:
with open('combined_step_analysis_results_with_arrays.json', 'r') as file:
    data = json.load(file)

# Flatten JSON into a structured DataFrame
flat_data = []

for participant, conditions in data.items():
    for condition, trials in conditions.items():
        for trial, metrics in trials.items():
            # Extract only the required scalar value for velocity (assuming velocity_mean_l is a float, not a list)
            if isinstance(metrics["mean_velocity"], list):
                # If velocity_mean_l is somehow a list, take the mean or handle appropriately
                velocity = sum(metrics["mean_velocity"]) / len(metrics["mean_velocity"])
            else:
                velocity = metrics["mean_velocity"]

            row = {
                "Participant": participant,
                "Condition": condition,
                "Trial": trial,
                "Velocity": velocity, 
                "numStrides_r": metrics["numStrides_r"],
                "numStrides_l": metrics["numStrides_l"],
                "RoM_ankle_l": metrics["footAngle_l"],
                "RoM_knee_l": metrics["kneeAngle_l"],
                "RoM_ankle_r": metrics["footAngle_r"],
                "RoM_knee_r": metrics["kneeAngle_r"],
                "Walking_distance": metrics["walking_distance_r"],
            }
            flat_data.append(row)

# Create a pandas DataFrame
df = pd.DataFrame(flat_data)

df['Participant'] = df['Participant'].str.extract('(\d+)').astype(int)
df['Condition'] = df['Condition'].str.extract('(\d+)').astype(int)
df['Trial'] = df['Trial'].str.extract('(\d+)').astype(int)
# Convert categorical variables
df['Participant'] = df['Participant'].astype('category')
df['Condition'] = df['Condition'].astype('category')

#normalize velocity
velocity_min = df['Velocity'].min()
velocity_max = df['Velocity'].max()

# Min-Max Normalisierung der Velocity-Daten
df['Velocity_normalized'] = (df['Velocity'] - velocity_min) / (velocity_max - velocity_min)

unique_participants = df['Participant'].unique()
participant_height_dict = dict(zip(unique_participants, heights))
df["Height"] = df["Participant"].map(participant_height_dict)
participant_weight_dict = dict(zip(unique_participants, weights))
df["Weight"] = df["Participant"].map(participant_weight_dict)

embodi_data = {}
conditions =["Baseline", "NoAvatar", "Normal", "Small", "Large"]
for c in df['Condition'].unique():
    if c== 0:
        continue

    temp = {}
    cond = conditions[c-1]
    for p in df['Participant'].unique():
        if p< 10:
            file_path = f"C:/Users/LauflaborVR2/GS-MT_Test/Data/0{p}{cond}.csv"
        else:
             file_path = f"C:/Users/LauflaborVR2/GS-MT_Test/Data/{p}{cond}.csv"
        try:
            d = pd.read_csv(file_path, names=["Question", "Value"])
            temp[p] = d
        except FileNotFoundError:
            print(f"Datei nicht gefunden: {file_path}")
    embodi_data[c] = temp

# Fragebogendaten dem DataFrame df hinzufügen
question_columns = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12']

for index, row in df.iterrows():
    participant = row['Participant']
    condition = row['Condition']

    # Fragebogendaten nur hinzufügen, wenn es sich nicht um die Baseline-Bedingung handelt
    if condition != 0 and condition in embodi_data and participant in embodi_data[condition]:
        survey_data = embodi_data[condition][participant]
        # Berechnung der Mittelwerte für "Ownership", "Agency" und "Change"
        ownership_mean = survey_data['Value'].iloc[:4].mean()
        agency_mean = survey_data['Value'].iloc[4:8].mean()
        change_mean = survey_data['Value'].iloc[8:12].mean()
        
        # Mittelwerte dem DataFrame hinzufügen
        df.at[index, 'Ownership'] = ownership_mean
        df.at[index, 'Agency'] = agency_mean
        df.at[index, 'Change'] = change_mean

Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/08Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/09Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/10Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/11Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/12Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/13Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/14Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/15Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/16Baseline.csv
Datei nicht gefunden: C:/Users/LauflaborVR2/GS-MT_Test/Data/17Baseline.csv


In [58]:
df["Walking_distance"]/1000

0      2.905574
1      2.844447
2      2.741849
3      2.794128
4      2.840076
         ...   
443    2.986257
444    2.937250
445    3.073988
446    3.044719
447    3.010452
Name: Walking_distance, Length: 448, dtype: float64

In [None]:
model = bmb.Model("Velocity_normalized ~  Condition + Height ", data= df) # (1 | x) is a random intercept  + (1|Participant)
fitted = model.fit( 
    draws=1000, 
    tune= 500,
    # init="adapt_diag", 
    # random_seed=69,
    cores=8,
    #chains=8,
    )

In [None]:
az.plot_trace(
    fitted,
    var_names=["Intercept", "Condition", "sigma"],
    compact=False,
)



In [None]:
ppc = model.predict(idata=fitted, kind="pps", inplace=False)


In [None]:
print(ppc)

In [None]:

ppc_samples = ppc.posterior_predictive
print(ppc_samples)

In [None]:
az.plot_ppc(ppc, var_names=["NumStrides"])