In [11]:
import pandas as pd
import numpy as np
import os

Preprocess csv training file

X

In [74]:
def load_and_concatenate_chroma_files(csv_base_path, lab_base_path):
    # List to store all dataframes
    all_dfs = []

    # Get a list of subfolders and sort them by name (ascending order)
    subfolders = sorted([f for f in os.listdir(csv_base_path) if f.isdigit()])  # Only select numbered folders

    # Iterate over each folder in ascending order
    for folder in subfolders:
        csv_path = os.path.join(csv_base_path, folder, 'bothchroma.csv')
        lab_file_path = os.path.join(lab_base_path, folder, 'majmin.lab')

        if os.path.exists(csv_path):  # Ensure the file exists
            print(f"Loading {csv_path}")
            df = pd.read_csv(csv_path, header=None)  # Load the CSV without headers
            print(df.shape)
            # add and initialize the label column
            df['Y'] = 'N'
            cur_row_chroma = 0  # index of the current row in the df
            # label
            if os.path.exists(lab_file_path):  # Ensure the file exists
                with open(lab_file_path, 'r') as file:
                    for line in file:
                        # Split the lne into columns
                        columns = line.strip().split()
                        if len(columns) == 3:
                            start_time = float(columns[0])
                            end_time = float(columns[1])
                            chord = columns[2]

                            # iterate through rows in df to assign chord label
                            while cur_row_chroma < df.shape[0] \
                                and start_time <= float(df.iloc[cur_row_chroma, 1]) < end_time:    # if an entry sampled fall into the current label time interval
                                df.loc[cur_row_chroma, 'Y'] = chord
                                # print(f"chord {chord} assigned to row {cur_row_chroma}")
                                cur_row_chroma += 1
            all_dfs.append(df)  # Append the dataframe to the list
        else:
            print(f"File {csv_path} does not exist!")

    # Concatenate all DataFrames into one large DataFrame
    big_df = pd.concat(all_dfs, ignore_index=True)

    return big_df

In [75]:
# Path to the base directory containing numbered folders
csv_base_dir = os.path.join('training_data', 'metadata', 'metadata')
lab_base_dir = os.path.join('training_data', 'annotations', 'annotations')

# Load and concatenate all chroma CSV files
big_dataframe = load_and_concatenate_chroma_files(csv_base_dir, lab_base_dir)

# drop file name column
big_dataframe.drop(big_dataframe.columns[[0]], axis=1, inplace=True)

# drop extra features
big_dataframe.drop(big_dataframe.columns[13:25], axis=1, inplace=True)


Loading training_data/metadata/metadata/0003/bothchroma.csv
(3250, 26)
Loading training_data/metadata/metadata/0004/bothchroma.csv
(4442, 26)
Loading training_data/metadata/metadata/0006/bothchroma.csv
(4746, 26)
Loading training_data/metadata/metadata/0010/bothchroma.csv
(6842, 26)
Loading training_data/metadata/metadata/0012/bothchroma.csv
(4562, 26)
Loading training_data/metadata/metadata/0015/bothchroma.csv
(4442, 26)
Loading training_data/metadata/metadata/0016/bothchroma.csv
(4922, 26)
Loading training_data/metadata/metadata/0018/bothchroma.csv
(4978, 26)
Loading training_data/metadata/metadata/0019/bothchroma.csv
(3826, 26)
Loading training_data/metadata/metadata/0021/bothchroma.csv
(4474, 26)
Loading training_data/metadata/metadata/0022/bothchroma.csv
(5122, 26)
Loading training_data/metadata/metadata/0023/bothchroma.csv
(4722, 26)
Loading training_data/metadata/metadata/0025/bothchroma.csv
(5186, 26)
Loading training_data/metadata/metadata/0026/bothchroma.csv
(3194, 26)
Loadin

In [76]:
big_dataframe.head(50)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,Y
0,0.0,0.198482,0.0,0.0,0.635556,0.741292,1.0043,0.81444,0.029282,0.141189,0.80793,0.91015,0.823603,N
1,0.04644,0.310882,0.0,0.0,0.693876,0.628553,1.08004,0.676368,0.017598,0.140079,0.927873,0.663851,0.460197,N
2,0.09288,0.404969,0.0,0.037238,0.68277,0.59114,1.14683,0.575229,0.014624,0.128743,0.968012,0.461634,0.260265,N
3,0.13932,0.480218,0.0,0.005002,0.435639,0.450297,1.21112,0.458671,0.006372,0.102629,0.962884,0.301896,0.257244,N
4,0.18576,0.539064,0.146614,0.010891,0.444361,0.196939,1.29815,0.239054,0.023305,0.20809,0.948395,0.025802,0.341731,N
5,0.2322,0.165807,0.0,0.455848,0.965423,0.0,1.23244,0.0,0.05483,0.034234,0.866962,0.0,0.384071,N
6,0.278639,0.0,0.094263,0.576515,1.96129,0.0,0.766963,0.0,0.046361,0.397669,0.68296,0.0,0.770187,N
7,0.325079,0.00174,0.021126,0.335464,2.5958,0.0,0.0,0.0,0.340361,0.089094,0.528319,0.383412,0.871471,N
8,0.371519,0.0,0.0,0.0,2.82216,0.0,0.0,0.0,0.601873,0.221148,0.532542,0.387659,0.834525,N
9,0.417959,0.494383,0.016506,0.102202,2.83751,0.0,0.0,0.0,0.526568,0.182221,0.774787,0.068274,0.367045,N
