In [7]:
import pandas as pd
import numpy as np
import re
from scipy.stats import linregress

In [3]:
df = pd.read_csv("experiment_data.csv")

In [6]:
def parse_reading(reading_str):
    # Define a regex pattern that matches np.float64(<number>)
    pattern = r'np\.float64\((-?\d+\.\d+)\)'
    matches = re.findall(pattern, reading_str)
    return [float(num) for num in matches]


In [8]:
df["reading_parsed"] = df["reading"].apply(parse_reading)

In [13]:
# Group the data by material, motion, and trial.
grouped = df.groupby(["material", "motion", "trial"])

In [18]:
def compute_trial_features(trial_df):
    # Ensure the trial is sorted by timestamp
    trial_df = trial_df.sort_values("timestamp")
    # Create a 2D numpy array: shape (n_samples, 15)
    data = np.stack(trial_df["reading_parsed"].tolist(), axis=0)
    n_samples, n_channels = data.shape
    x = np.arange(n_samples)  # time indices (0 to 19)
    
    features = {}
    for ch in range(n_channels):
        channel_data = data[:, ch]
        # Basic time-series features:
        features[f"ch{ch}_mean"] = np.mean(channel_data)
        features[f"ch{ch}_std"] = np.std(channel_data)
        features[f"ch{ch}_min"] = np.min(channel_data)
        features[f"ch{ch}_max"] = np.max(channel_data)
        features[f"ch{ch}_ptp"] = np.ptp(channel_data)  # peak-to-peak difference
        
        # Compute the linear trend (slope) via linear regression:
        slope, _, _, _, _ = linregress(x, channel_data)
        features[f"ch{ch}_slope"] = slope
        
        # Optional: energy of the signal
        features[f"ch{ch}_energy"] = np.sum(channel_data**2) / n_samples
    return features

In [19]:
trial_features = []
for (material, motion, trial), group in grouped:
    feats = compute_trial_features(group)
    feats["material"] = material
    feats["motion"] = motion
    feats["trial"] = trial
    trial_features.append(feats)

In [53]:
# Create a DataFrame from the trial features
features_df = pd.DataFrame(trial_features)

In [54]:
#save the features to a CSV file
features_df.to_csv("trial_features.csv", index=False)

In [55]:
print(type(features_df['ch0_energy'][0]))

<class 'numpy.float64'>


In [56]:
numeric_cols = features_df.select_dtypes(include=[np.number]).columns

In [57]:
# Now, you can compute aggregated statistics on the extracted features.
# For example, to compute statistics for each material-motion combination:
mm_stats = features_df.groupby(["material", "motion"])[numeric_cols].agg(["std"])

# Similarly, aggregated statistics can be computed for each motion (regardless of material)
motion_stats = features_df.groupby("motion")[numeric_cols].agg(["std"])

# And for each material (regardless of motion)
material_stats = features_df.groupby("material")[numeric_cols].agg(["std"])

# Finally, compute overall statistics across all trials.
overall_stats = features_df[numeric_cols].agg(["std"])

In [58]:
# Display the results:
print("=== Material-Motion Level Statistics ===")
print(mm_stats)

print("\n=== Motion Level Statistics (aggregated over materials) ===")
print(motion_stats)

print("\n=== Material Level Statistics (aggregated over motions) ===")
print(material_stats)

print("\n=== Overall Statistics (all data) ===")
print(overall_stats)

=== Material-Motion Level Statistics ===
                 ch0_mean   ch0_std   ch0_min   ch0_max   ch0_ptp ch0_slope  \
                      std       std       std       std       std       std   
material motion                                                               
fabric   push    0.580390  0.731874  2.120260  1.119488  2.721994  0.034803   
         rub     0.418538  0.675166  1.878297  0.492949  2.048474  0.229793   
         tap     0.251199  0.297297  1.044629  1.185853  2.086561  0.103680   
metal    push    0.776520  0.194940  1.266589  2.007984  2.117602  0.044308   
         rub     0.479619  0.473441  1.606470  2.191745  1.211196  0.099352   
         tap     0.469352  0.840323  1.689081  3.107575  3.656743  0.135231   
wood     push    0.958293  0.536710  2.108020  1.760183  2.535397  0.181640   
         rub     1.066082  0.270630  1.257677  1.279846  0.627495  0.099501   
         tap     0.687775  0.250026  0.730241  1.420651  1.198122  0.206865   

          

In [59]:
import pandas as pd
import numpy as np
from statsmodels.multivariate.manova import MANOVA

# Assume features_df is your DataFrame with shape (45, 108)
# It has columns like "ch0_mean", "ch0_std", …, "ch14_energy", and metadata columns "material", "motion", "trial", 
# plus (if present) pca and tsne columns that we want to ignore.

# 1. Subset to push operation trials
df_push = features_df[features_df["motion"] == "push"].copy()

# 2. Drop columns that are not part of the sensor features.
# We want to keep "material" because it is our independent variable.
cols_to_drop = ['motion', 'trial', 'pca1', 'pca2', 'tsne1', 'tsne2']
df_push.drop(columns=[col for col in cols_to_drop if col in df_push.columns], inplace=True)

# 3. Identify the numeric feature columns.
# Since the remaining columns include our sensor features (all numeric) and the categorical "material",
# we select the numeric ones for the dependent variable.
feature_cols = df_push.select_dtypes(include=[np.number]).columns.tolist()

# 4. Build the MANOVA formula.
# Dependent variables: all the numeric features (e.g., ch0_mean, ch0_std, …, ch14_energy)
# Independent variable: material
dep_vars = " + ".join(feature_cols)
formula = f"{dep_vars} ~ material"
print("MANOVA formula:")
print(formula)

# 5. Run MANOVA
manova_results = MANOVA.from_formula(formula, data=df_push)
print(manova_results.mv_test())


MANOVA formula:
ch0_mean + ch0_std + ch0_min + ch0_max + ch0_ptp + ch0_slope + ch0_energy + ch1_mean + ch1_std + ch1_min + ch1_max + ch1_ptp + ch1_slope + ch1_energy + ch2_mean + ch2_std + ch2_min + ch2_max + ch2_ptp + ch2_slope + ch2_energy + ch3_mean + ch3_std + ch3_min + ch3_max + ch3_ptp + ch3_slope + ch3_energy + ch4_mean + ch4_std + ch4_min + ch4_max + ch4_ptp + ch4_slope + ch4_energy + ch5_mean + ch5_std + ch5_min + ch5_max + ch5_ptp + ch5_slope + ch5_energy + ch6_mean + ch6_std + ch6_min + ch6_max + ch6_ptp + ch6_slope + ch6_energy + ch7_mean + ch7_std + ch7_min + ch7_max + ch7_ptp + ch7_slope + ch7_energy + ch8_mean + ch8_std + ch8_min + ch8_max + ch8_ptp + ch8_slope + ch8_energy + ch9_mean + ch9_std + ch9_min + ch9_max + ch9_ptp + ch9_slope + ch9_energy + ch10_mean + ch10_std + ch10_min + ch10_max + ch10_ptp + ch10_slope + ch10_energy + ch11_mean + ch11_std + ch11_min + ch11_max + ch11_ptp + ch11_slope + ch11_energy + ch12_mean + ch12_std + ch12_min + ch12_max + ch12_ptp + ch