In [1]:
import pandas as pd
import numpy as np
import random
import uuid
import os

def augment_data(num_new_rows):
    # Load the original CSV files
    labels_df = pd.read_csv("labels.csv")
    helmet_df = pd.read_csv("helmet.csv")
    environment_df = pd.read_csv("environment.csv")
    calendar_df = pd.read_csv("calendar.csv")

    # Extract the existing UUIDs to ensure uniqueness
    existing_uuids = set(labels_df["UUID"])

    # Calculate the min and max values for each column in helmet.csv (excluding UUID)
    column_min_max = {
        column: (helmet_df[column].min(), helmet_df[column].max())
        for column in helmet_df.columns if column != "UUID"
    }

    # Define possible values for labels and other files
    label_choices = ["move", "turnRight", "turnLeft"]
    var2_choices = ["slippery", "plain", "slope", "house", "track"]
    var1_choices = ["shopping", "sport", "cooking", "relax", "gaming"]

    # Create empty lists to store new rows for each file
    new_labels = []
    new_helmet = []
    new_environment = []
    new_calendar = []

    for _ in range(num_new_rows):
        # Generate a unique UUID
        new_uuid = None
        while not new_uuid or new_uuid in existing_uuids:
            new_uuid = str(uuid.uuid4())[4:23]
        existing_uuids.add(new_uuid)

        # Create a new row for labels.csv
        new_labels.append({"UUID": new_uuid, "LABEL": random.choice(label_choices)})

        # Create a new row for helmet.csv
        new_helmet_row = {"UUID": new_uuid}
        for column, (min_val, max_val) in column_min_max.items():
            new_helmet_row[column] = np.random.uniform(min_val, max_val)
        new_helmet.append(new_helmet_row)

        # Create a new row for environment.csv
        new_environment.append({"UUID": new_uuid, "VAR2": random.choice(var2_choices)})

        # Create a new row for calendar.csv
        new_calendar.append({"UUID": new_uuid, "VAR1": random.choice(var1_choices)})

    # Append the new rows to the original DataFrames
    labels_df = pd.concat([labels_df, pd.DataFrame(new_labels)], ignore_index=True)
    helmet_df = pd.concat([helmet_df, pd.DataFrame(new_helmet)], ignore_index=True)
    environment_df = pd.concat([environment_df, pd.DataFrame(new_environment)], ignore_index=True)
    calendar_df = pd.concat([calendar_df, pd.DataFrame(new_calendar)], ignore_index=True)

    # Create augmented_data directory if it does not exist
    if not os.path.exists("augmented_data"):
        os.makedirs("augmented_data")

    # Save the augmented DataFrames to new CSV files
    labels_df.to_csv("augmented_data/augmented_labels.csv", index=False)
    helmet_df.to_csv("augmented_data/augmented_helmet.csv", index=False)
    environment_df.to_csv("augmented_data/augmented_environment.csv", index=False)
    calendar_df.to_csv("augmented_data/augmented_calendar.csv", index=False)

    print("Data augmentation completed. The augmented files are saved.")

# Example usage
augment_data(num_new_rows=30000)


Data augmentation completed. The augmented files are saved.
