In [20]:
import os
import sys
import pandas as pd
import numpy as np

# Import library with current code functions
sys.path.append(os.path.join("..", "lib"))
import neural_network_functions as neural_net_fun
import general_functions as gf
import files_paths as fp

In [21]:
dataset_sequences_path = os.path.join("result_sequences", "DATASET_SEQUENCES_5_30_filtered.CSV")

In [22]:
dataset_sequences_df = pd.read_csv(dataset_sequences_path)

In [None]:
# Organize data by emotions and measures
def organize_data_by_emotion(dataset_sequences_df):
    for sample_id, group_data in dataset_sequences_df.groupby('sample_id'):
        # Retrieve the label (emotion) for the current sample
        current_label = group_data['label'].iloc[0]
        
        # Extract columns from 'm1' to 'b3' for the current group
        selected_features = group_data.loc[:, 'm1':'b3']

        # Append each measure's series to the corresponding emotion and measure in the dictionary
        for measure in selected_features.columns:
            labels[current_label][measure].append(selected_features[measure].values)

    return labels

def compare_original_and_augmented_data(original_labels, augmented_labels):
    for emotion in original_labels.keys():
        # Select the first measure to display
        first_measure = next(iter(original_labels[emotion].keys()))
        
        original_series_count = len(original_labels[emotion][first_measure])
        augmented_series_count = len(augmented_labels[emotion][first_measure])
        
        print(f"Emotion: {emotion}")
        print(f"  Measure: {first_measure}")
        print(f"    Original Series Count: {original_series_count}")
        print(f"    Augmented Series Count: {augmented_series_count}")
        
        # Check if series lengths are consistent
        original_lengths = [len(series) for series in original_labels[emotion][first_measure]]
        augmented_lengths = [len(series) for series in augmented_labels[emotion][first_measure]]
        
        print(f"    Original Series Lengths: {original_lengths[:5]}{'...' if len(original_lengths) > 5 else ''}")
        print(f"    Augmented Series Lengths: {augmented_lengths[:5]}{'...' if len(augmented_lengths) > 5 else ''}")
        print("\n")

def generate_augmented_data(labels, emotions_to_augment=None, augmentation_factors=None):
    """
    Generate augmented data for specified emotions with respective augmentation factors.
    """
    # Initialize augmented data structure
    augmented_labels = {emotion: {measure: [] for measure in labels[emotion].keys()} for emotion in labels.keys()}

    for emotion, measures in labels.items():
        # Skip emotions not in the specified list (if provided)
        if emotions_to_augment and emotion not in emotions_to_augment:
            augmented_labels[emotion] = measures  # Copy original data
            continue

        # Get the augmentation factor for the current emotion
        augmentation_factor = augmentation_factors.get(emotion, 1.0)  # Default to 1.0 if not specified

        for measure, series_list in measures.items():
            # Append the original series
            augmented_labels[emotion][measure].extend(series_list)
            
            # Calculate the number of augmented series to generate
            num_augmented = int(len(series_list) * (augmentation_factor - 1))
            
            # Generate augmented versions of the series
            for _ in range(num_augmented):
                # Randomly select a series to augment
                series = random.choice(series_list)
                
                # Augmentation technique 1: Add Gaussian noise
                noise = np.random.normal(0, 0.01, size=series.shape)
                augmented_series = series + noise
                
                # Augmentation technique 2: Scale the data
                scale_factor = np.random.uniform(0.8, 1.2)
                scaled_series = series * scale_factor
                
                # Augmentation technique 3: Mirror the data
                mirrored_series = np.flip(series)
                
                # Randomly choose one augmentation to add
                augmentation_options = [augmented_series, scaled_series, mirrored_series]
                chosen_augmentation = random.choice(augmentation_options)
                augmented_labels[emotion][measure].append(chosen_augmentation)
    
    print("Augmented data generated.")
    return augmented_labels

def augmented_data_to_dataframe(augmented_labels):
    """
    Convert augmented data into a DataFrame.
    """
    rows = []
    for emotion, measures in augmented_labels.items():
        for measure, series_list in measures.items():
            for series in series_list:
                rows.append({'emotion': emotion, 'measure': measure, 'series': series})

    df = pd.DataFrame(rows)
    return df

In [None]:
# Initialize dictionary for organizing features by emotion
labels = {emotion: {measure: [] for measure in ['m1', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9', 'm10', 'm11', 'm12', 'm13', 'm14', 'm15', 'm16', 'm17', 'e1', 'e2', 'e3', 'b1', 'b2', 'b3']} 
          for emotion in ['anger', 'contempt', 'disgust', 'fear', 'happy', 'sadness', 'surprise']}

# Example usage
# Assuming `dataset_sequences_df` is already loaded
organized_labels = organize_data_by_emotion(dataset_sequences_df)

# Define emotions to augment and their respective augmentation factors
emotions_to_augment = ['anger', 'contempt', 'sadness', 'fear']
augmentation_factors = {'anger': 1.4, 'contempt': 1.4, 'sadness': 1.4, 'fear': 1.4}

# Generate augmented data selectively for specified emotions
augmented_data = generate_augmented_data(organized_labels, emotions_to_augment=emotions_to_augment, augmentation_factors=augmentation_factors)

# Convert augmented data to DataFrame
augmented_df = augmented_data_to_dataframe(augmented_data)

# Save to CSV
augmented_df.to_csv('augmented_data.csv', index=False)
print("Augmented data saved to 'augmented_data.csv'.")


Augmented data generated.
Augmented data saved to 'augmented_data.csv'.
