In [39]:
import pandas as pd
import os

In [40]:
processed_dataset = './DatasetPro/0004'
annotations_folder = './Dataset/annotations/annotations/0004'  # Adjust path as needed
metadata_folder = './Dataset/metadata/metadata/0004'  # Adjust path as needed

In [41]:

# Path to the sample file
bothchroma_file = os.path.join(metadata_folder, 'bothchroma.csv')

# Read the file, ignoring the first column, and only selecting relevant columns
bothchroma_df = pd.read_csv(bothchroma_file, usecols=range(1, 26))

bin_names = ["A", "Bb", "B", "C", "C#", "D", "Eb", "E", "F", "F#", "G", "Ab"];
print(bin_names)

# Rename columns for clarity
bothchroma_df.columns = (
    ['timestamp'] +
    [f'{bin_names[i]}_B' for i in range(len(bin_names))] +
    [f'{bin_names[i]}' for i in range(len(bin_names))]
)

# Display the extracted table
print(bothchroma_df.head())

# Save the structured table if needed
os.makedirs(processed_dataset, exist_ok=True)
output_file = os.path.join(processed_dataset, 'structured_bothchroma.csv')
bothchroma_df.to_csv(output_file, index=False)
print(f"Structured table saved to: {output_file}")


['A', 'Bb', 'B', 'C', 'C#', 'D', 'Eb', 'E', 'F', 'F#', 'G', 'Ab']
   timestamp       A_B      Bb_B       B_B       C_B      C#_B       D_B  \
0    0.04644  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1    0.09288  0.422345  0.006912  0.015634  0.286831  1.356600  0.605075   
2    0.13932  0.259532  0.000000  0.342341  0.877206  0.646397  0.073171   
3    0.18576  0.000933  0.000000  0.407497  1.324630  0.071384  0.000000   
4    0.23220  0.001146  0.000000  0.000000  1.535740  0.000000  0.000000   

       Eb_B       E_B       F_B  ...         B         C       C#         D  \
0  0.000000  0.000000  0.000000  ...  0.000000  0.000000  0.00000  0.000000   
1  0.165295  0.000000  0.414818  ...  0.003125  0.644012  2.42598  0.209733   
2  0.305260  0.000000  1.077640  ...  0.106856  0.876610  2.61391  0.155448   
3  0.000000  0.436044  2.136770  ...  0.126511  0.910977  2.32499  0.000000   
4  0.000000  0.411466  2.291890  ...  0.003299  1.048250  2.32230  0.000000   

  

In [42]:
# Path to the sample file
lab_file = os.path.join(annotations_folder, 'majmin7.lab')
labinv_file = os.path.join(annotations_folder, 'majmin7inv.lab')

# Map chords to match your bin naming convention
chord_naming_map = {
    "Db": "C#", "D#": "Eb", "Gb": "F#", "G#": "Ab", "A#": "Bb"
}


def standardize_chord(chord):
    """Convert enharmonic equivalents to match the bin naming convention."""
    for alt, standard in chord_naming_map.items():
        chord = chord.replace(alt, standard)  # Replace with the preferred notation
    return chord


# Read the LAB files
lab_df = pd.read_csv(lab_file, sep="\t", header=None, names=["Start", "End", "Chord"])
labinv_df = pd.read_csv(labinv_file, sep="\t", header=None, names=["Start", "End", "Inversion"])

lab_df["Chord"] = lab_df["Chord"].apply(standardize_chord)
labinv_df["Inversion"] = labinv_df["Inversion"].apply(standardize_chord)


# Merge the dataframes on Start & End times
merged_lab_df = pd.merge(lab_df, labinv_df, on=["Start", "End"], how="inner")

# Save the structured table if needed
output_file = os.path.join(processed_dataset, 'structured_lab.csv')
merged_lab_df.to_csv(output_file, index=False)
print(f"Structured table saved to: {output_file}")

# Display the merged data
print(merged_lab_df.head())



Structured table saved to: ./DatasetPro/0004/structured_lab.csv
      Start       End   Chord Inversion
0  0.000000  0.255420       N         N
1  0.255420  1.742781       N         N
2  1.742781  2.114622       N         N
3  2.114622  5.089345  Ab:maj    Ab:maj
4  5.089345  8.064068  C#:maj  C#:maj/5


In [43]:
# Load both CSV files
chroma_file = os.path.join(processed_dataset, 'structured_bothchroma.csv')
lab_file = os.path.join(processed_dataset, 'structured_lab.csv')

chroma_df = pd.read_csv(chroma_file)
lab_df = pd.read_csv(lab_file)

# Convert timestamps to numeric for proper merging
chroma_df["timestamp"] = pd.to_numeric(chroma_df["timestamp"])
lab_df["Start"] = pd.to_numeric(lab_df["Start"])
lab_df["End"] = pd.to_numeric(lab_df["End"])

# Assign chord labels to chroma timestamps
def get_chord_label(timestamp):
    match = lab_df[(lab_df["Start"] <= timestamp) & (lab_df["End"] > timestamp)]
    if not match.empty:
            return match["Chord"].values[0], match["Inversion"].values[0]  # Return both values
    return "N", "N"  # Default to "N" if no match


chroma_df[["Chord", "Inversion"]] = chroma_df["timestamp"].apply(lambda t: pd.Series(get_chord_label(t)))

# Save the merged dataset
merged_file = os.path.join(processed_dataset, 'merged_chroma_lab.csv')
chroma_df.to_csv(merged_file, index=False)
print(f"Merged dataset saved to: {merged_file}")

Merged dataset saved to: ./DatasetPro/0004/merged_chroma_lab.csv
