In [2]:
import os 
import pandas as pd

1. how many categories are in each folder
2. how many recordings in each category
3. Min/Max/Avg per category
4. Missing values
5. Find outliers
   1. per category
   2. visualizen

# Merge to df

In [3]:
def merge_geophone_falls(folder_path):
    """
    Merges all 'fall' data from CSV files in the given geophone folder.
    
    Parameters:
    - folder_path (str): Path to the geophone folder containing CSV files.
    
    Returns:
    - pd.DataFrame: Merged DataFrame with labeled columns.
    """
    all_data = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            parts = filename.split("_")
            
            if len(parts) < 5:
                print(f"Skipping file with unexpected format: {filename}")
                continue  # Skip files that don't match expected format
            
            fall_type = parts[0]  # First part of the filename
            fall_binary = parts[1]  # Check for fall keyword
            distance = parts[2]  # Third part of the filename
            person = parts[3]  # AW, AD, or 0
            floor_type = parts[-1].replace(".csv", "")  # Last part without ".csv"
            
            # Load CSV
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path, header=None)
            
            # Take only the first 500 values
            # values = df.iloc[0, :500].tolist() if df.shape[1] >= 500 else df.iloc[0, :].tolist() + [None] * (500 - df.shape[1])
            for _, row in df.iterrows():
                # Ensure only 500 values are added
                values = row[:500].tolist() if len(row) >= 500 else row.tolist() + [None] * (500 - len(row))
                all_data.append([fall_type, fall_binary, distance, person, floor_type] + values)

    # Create DataFrame
    column_names = ["activity", "fall_binary", "distance_m", "person_binary", "floor"] + [f"value_{i}" for i in range(1, 501)]
    merged_df = pd.DataFrame(all_data, columns=column_names)

    return merged_df

# Example usage
df_geophone = merge_geophone_falls("../geophone")

# Add Weight Column

In [4]:
def add_weight_column(df):
    """
    Adds a new column 'weight' to the dataframe based on the person's identity and activity type.

    - 'AW' (Anna) -> 60kg
    - 'AD' (Adrian/David) -> 75kg
    - 'FOB' (Bag) -> 6kg
    - 'FOL' (Blackroll) -> 0.2kg

    Parameters:
    df (pd.DataFrame): Input dataframe containing 'person' and 'activity' columns.

    Returns:
    pd.DataFrame: Dataframe with the new 'weight' column.
    """
    # Mapping persons to their respective weights
    person_weight_map = {'AW': 60, 'AD': 75, '0': 0}

    # Insert weight column based on person mapping
    df.insert(4, 'weight', df['person_binary'].map(person_weight_map))

    # Update weight based on activity type
    df['weight'] = df.apply(lambda row: 
                            6 if row['activity'] == 'FOB' 
                            else (0.2 if row['activity'] == 'FOL' 
                                  else row['weight']), axis=1)
    
    return df

# Example usage:
df_geophone = add_weight_column(df_geophone)

# Map Person Column

In [5]:
def map_person_column(df, column_name='person_binary'):
    """
    Maps the 'person' column values to their numerical values.

    Parameters:
    - df (pd.DataFrame): Input dataframe containing 'person_binary' column.
    - column_name (str): Name of the column to be mapped.

    Returns:
    - pd.DataFrame: Dataframe with the mapped 'person' column.
    """
    df[column_name] = df[column_name].map({'AW': 1, 'AD': 1}).fillna(0).astype(int)
    return df

# Example usage:
df_geophone = map_person_column(df_geophone)

In [6]:
df_geophone.head()

Unnamed: 0,activity,fall_binary,distance_m,person_binary,weight,floor,value_1,value_2,value_3,value_4,...,value_491,value_492,value_493,value_494,value_495,value_496,value_497,value_498,value_499,value_500
0,FCS,1,1,1,75.0,H,59,-38,10,43,...,10,-8,-6,3,6,-5,10,10,-12,-6
1,FCS,1,1,1,75.0,H,104,-49,137,-46,...,-1,-15,-9,6,13,6,5,6,-4,-17
2,FCS,1,1,1,75.0,H,-11,48,-13,8,...,170,-107,-246,70,158,110,-52,-100,63,76
3,FCS,1,1,1,75.0,H,16,-11,22,4,...,2,-8,-13,0,13,1,-1,4,4,-9
4,FCS,1,1,1,75.0,H,5,-11,-4,2,...,36,12,2,13,8,7,9,-33,-11,-11


In [7]:
df_geophone.to_csv("merged_geophone_falls.csv", index=False)