In [11]:
import eda as eda
import os 
import pandas as pd

1. how many categories are in each folder
2. how many recordings in each category
3. Min/Max/Avg per category
4. Missing values
5. Find outliers
   1. per category
   2. visualizen
6. Feature Engineering
   1. Amplitude
   2. Energy
   3. Frequency
   4. Peak-to-peak
   5. ARV: Average rectified value
   6. Weighted-ARV: 
   (7. FFT: Frequency spektrum, different frequency parts)
7. Clustering

# Merge to df

In [12]:
def merge_geophone_falls(folder_path):
    """
    Merges all 'fall' data from CSV files in the given geophone folder.
    
    Parameters:
    - folder_path (str): Path to the geophone folder containing CSV files.
    
    Returns:
    - pd.DataFrame: Merged DataFrame with labeled columns.
    """
    all_data = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            parts = filename.split("_")
            
            if len(parts) < 5:
                print(f"Skipping file with unexpected format: {filename}")
                continue  # Skip files that don't match expected format
            
            fall_type = parts[0]  # First part of the filename
            fall_binary = parts[1]  # Check for fall keyword
            distance = parts[2]  # Third part of the filename
            person = parts[3]  # AW, AD, or 0
            floor_type = parts[-1].replace(".csv", "")  # Last part without ".csv"
            
            # Load CSV
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path, header=None)
            
            # Take only the first 500 values
            # values = df.iloc[0, :500].tolist() if df.shape[1] >= 500 else df.iloc[0, :].tolist() + [None] * (500 - df.shape[1])
            for _, row in df.iterrows():
                # Ensure only 500 values are added
                values = row[:500].tolist() if len(row) >= 500 else row.tolist() + [None] * (500 - len(row))
                all_data.append([fall_type, fall_binary, distance, person, floor_type] + values)

    # Create DataFrame
    column_names = ["activity", "fall_binary", "distance_m", "person", "floor"] + [f"value_{i}" for i in range(1, 501)]
    merged_df = pd.DataFrame(all_data, columns=column_names)

    return merged_df

# Example Usage
df_geophone = merge_geophone_falls("../geophone")
df_geophone.to_csv("merged_geophone_falls.csv", index=False)


In [None]:
# add new column weight
'''
60kg for Anna, 75kg for Adrian/David, 0 for things
Then 6 for FOB, 0.2 for FOL (Bag and Blackroll)
'''
df_geophone.insert(4, 'weight', df_geophone['person'].map({'AW': 60, 'AD': 75, '0': 0}))
df_geophone['weight'] = df_geophone.apply(lambda row:
                                          6 if row['activity'] == 'FOB' 
                                          else (0.2 if row['activity'] == 'FOL' 
                                                else row['weight']), axis=1)

In [16]:
df_geophone.to_csv("merged_geophone_falls.csv", index=False)