In [1]:
from api import *
import pandas as pd
import numpy as np
import pickle
import os
from sklearn.preprocessing import StandardScaler

In [2]:
emotion_dict = {0 : "valence", 1: "arousal", 2: "dominance", 3: "liking"}
emotion_index = 0

In [3]:
def normalize_bin_func(df, i):
    # Assuming df is your DataFrame
    selected_columns = list(range(i, 70, 5))  # Columns 0, 5, 10, ..., 70
    
    # Print selected columns for debugging
    #print(f"Normalizing columns: {selected_columns}")
    
    # Extract the selected columns data
    data_to_normalize = df.iloc[:, selected_columns]
    
    # Print extracted data before normalization for debugging
    #print(f"Data before normalization:\n{data_to_normalize.head()}")
    
    # Calculate the mean and std deviation across all selected columns together
    combined_mean = data_to_normalize.values.mean()
    combined_std = data_to_normalize.values.std()

    # Print mean and std deviation for debugging
    #print(f"Mean: {combined_mean}, Std: {combined_std}")

    # Apply z-score normalization: (x - mean) / std
    normalized_data = (data_to_normalize - combined_mean) / combined_std
    #print(f"Normalized data for columns {selected_columns}:\n{normalized_data}")
    
    # Ensure the normalization result is assigned back correctly
    df = df.copy()  # This ensures you're working with a copy, not a view
    df.iloc[:, selected_columns] = normalized_data
    #print(f"Normalized data for:\n{df}")
    
    return df
    

In [4]:
# Define the base file path and the part that changes
base_path = "/home/nerlnet/data/persons_csvs/persons_valence/person_nerlnet"
end_path = "_valence.csv"

directory = '/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence'
os.makedirs(directory, exist_ok=True)

for i in range(1,33):
    file_index = f"{i:02}"
    file_path_1 = f"{base_path}{file_index}{end_path}"
    person_valence_df = pd.read_csv(file_path_1)
    person_valence_df = person_valence_df.astype('float32')
    columns_to_normalize = person_valence_df.columns[:70]
    columns_to_keep = person_valence_df.columns[70:]

    df_normalized = person_valence_df[columns_to_normalize].copy()

    for j in range(5):  # j = 0, 1, 2, 3, 4
        df_normalized = normalize_bin_func(df_normalized, j)

    df_normalized = pd.concat([df_normalized,person_valence_df[columns_to_keep].reset_index(drop=True)], axis=1)
    filename = f'person{file_index}_normalize_bins_valence.csv'
    filepath = os.path.join(directory, filename)
    df_normalized.to_csv(filepath, index = False, header = False)
    print(f"done normalize person number : {i}")

done normalize person number : 1
done normalize person number : 2
done normalize person number : 3
done normalize person number : 4
done normalize person number : 5
done normalize person number : 6
done normalize person number : 7
done normalize person number : 8
done normalize person number : 9
done normalize person number : 10
done normalize person number : 11
done normalize person number : 12
done normalize person number : 13
done normalize person number : 14
done normalize person number : 15
done normalize person number : 16
done normalize person number : 17
done normalize person number : 18
done normalize person number : 19
done normalize person number : 20
done normalize person number : 21
done normalize person number : 22
done normalize person number : 23
done normalize person number : 24
done normalize person number : 25
done normalize person number : 26
done normalize person number : 27
done normalize person number : 28
done normalize person number : 29
done normalize person n

In [5]:
person3_val_norm_df = pd.DataFrame()
for i in range(1,4):
    file_index = f"{i:02}"
   
    file_path_1 = f'/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence/person{file_index}_normalize_bins_valence.csv'
    person_val_df = pd.read_csv(file_path_1, header = None)
    person3_val_norm_df = pd.concat([person3_val_norm_df,person_val_df], axis = 0, ignore_index = True)

directory = '/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence'
os.makedirs(directory, exist_ok=True)
filename = f'1_3_persons_normalize_bins_valence.csv'
filepath = os.path.join(directory, filename)
person3_val_norm_df.to_csv(filepath, index = False, header = False)

In [6]:
persons12_val_norm_df = pd.DataFrame()
for i in range(1,13):
    file_index = f"{i:02}"
    file_path_1 = f'/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence/person{file_index}_normalize_bins_valence.csv'
    person_val_df = pd.read_csv(file_path_1, header = None)
    persons12_val_norm_df = pd.concat([persons12_val_norm_df,person_val_df], axis = 0, ignore_index = True)

directory = '/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence'
os.makedirs(directory, exist_ok=True)
filename = f'12_persons_normalize_bins_valence.csv'
filepath = os.path.join(directory, filename)
persons12_val_norm_df.to_csv(filepath, index = False, header = False)

In [7]:
persons_val_norm_df = persons12_val_norm_df
for i in range(13,24):
    file_index = f"{i:02}"
    file_path_1 = f'/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence/person{file_index}_normalize_bins_valence.csv'
    person_val_df = pd.read_csv(file_path_1, header = None)
    persons_val_norm_df = pd.concat([persons_val_norm_df,person_val_df], axis = 0, ignore_index = True)

directory = '/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence'
os.makedirs(directory, exist_ok=True)
filename = f'full_persons_normalize_bins_valence.csv'
filepath = os.path.join(directory, filename)
persons_val_norm_df.to_csv(filepath, index = False, header = False)

In [9]:
for i in range(24,33):
    file_index = f"{i:02}"
    file_path_1 = f'/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence/person{file_index}_normalize_bins_valence.csv'
    person_val_df = pd.read_csv(file_path_1, header = None)
    persons_val_norm_df = pd.concat([persons_val_norm_df,person_val_df], axis = 0, ignore_index = True)

directory = '/home/nerlnet/data/persons_csvs/persons_normalize_bins_valence'
os.makedirs(directory, exist_ok=True)
filename = f'full_persons_normalize_bins_valence.csv'
filepath = os.path.join(directory, filename)
persons_val_norm_df.to_csv(filepath, index = False, header = False)