# WAV to Chromagram Converter

For this code, we are creating multiple chromagrams, saving them to eventually be used for our multiple neural networks/ensembles in order to build a more accurate model

## 1 - Imports

In [41]:
import os
import numpy as np # For numerical operations
import matplotlib.pyplot as plt # For plotting
import gc # For garbage collection
import librosa # For audio processing

!{sys.executable} -m pip install tqdm

from tqdm import tqdm # For progress bars

# Had to force install librosa for audio processing as it was not installed in the environment
import sys

# clear the output of the previous cell
from IPython.display import clear_output
clear_output()

print(sys.executable)

/Library/Developer/CommandLineTools/usr/bin/python3


## 2 - Grab the file paths for all the music files, along with the associated genres

In [42]:
DATASET_PATH = "../Data/genres_original"
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
file_paths = []
genre_count = []

# Iterate through each genre and load the audio files
for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if os.path.isdir(genre_path):
        for file in os.listdir(genre_path):
            if file.endswith(".wav"):
                file_path = os.path.join(genre_path, file)
                try:
                    # Add file path to the list
                    file_paths.append(file_path)
                    # Assign label based on genre index
                    genre_count.append(genres.index(genre))
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
                    continue

# Print the number of loaded files and labels
print("Files loaded:", len(file_paths))
print("Labels loaded:", len(genre_count))
print("Genres loaded:", len(genres))

genre_count[400], file_paths[400]

Files loaded: 1000
Labels loaded: 1000
Genres loaded: 10


(4, '../Data/genres_original/hiphop/hiphop.00022.wav')

## 3 - Function for creating either 3-second or 30-second .wav Chromagram images

In [43]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
from IPython.display import clear_output

def _save_single_chromagram(chroma_normalized, sr, hop_length, cmap, file_path, genre, duration):

    plt.figure(figsize=(12, 4))
    
    librosa.display.specshow(
        chroma_normalized,
        y_axis='chroma',
        x_axis='time',
        sr=sr,
        hop_length=hop_length,
        cmap=cmap
    )
    
    # Show the full duration
    plt.xlim(0, duration)
    plt.gca().set_axis_off()
    plt.gcf().set_facecolor('none')
    
    # Construct save path
    saving_file_path = os.path.join(
        "../Images", "Chromagrams", genre, os.path.basename(file_path).replace("wav", "png")
        )
    
    # Ensure directory exists
    os.makedirs(os.path.dirname(saving_file_path), exist_ok=True)
    
    # Save the plot
    plt.savefig(saving_file_path, bbox_inches='tight', pad_inches=0)
    plt.close()  # Properly close the figure
    
    return saving_file_path

def _save_segmented_chromagrams(chroma_normalized, sr, hop_length, cmap, file_path, genre, num_segments=10):

    segments = np.array_split(chroma_normalized, num_segments, axis=1)
    base_filename = os.path.splitext(os.path.basename(file_path))[0]
    saved_paths = []
    
    for segment_idx, chroma_segment in tqdm(enumerate(segments, start=1)):
        plt.figure(figsize=(12/num_segments, 4))
        
        librosa.display.specshow(
            chroma_segment,
            y_axis='chroma',
            x_axis='time',
            sr=sr,
            hop_length=hop_length,
            cmap=cmap
        )
        
        # Calculate actual segment duration
        segment_duration = chroma_segment.shape[1] * hop_length / sr
        plt.xlim(0, segment_duration)
        plt.gca().set_axis_off()
        plt.gcf().set_facecolor('none')
        
        # Construct save path for this segment
        saving_file_path = os.path.join(
            "../Images", "Chromagrams (3 seconds)", genre,
            f"{base_filename}_segment_{segment_idx}.png"
        )
        
        # Ensure directory exists
        os.makedirs(os.path.dirname(saving_file_path), exist_ok=True)
        
        # Save the plot
        plt.savefig(saving_file_path, bbox_inches='tight', pad_inches=0)
        plt.close()  # Properly close the figure
        
        saved_paths.append(saving_file_path)
    
    return saved_paths

def createChromagramImages(inputDuration=30, inputSampleRate=22050, inputNFFT=2048, inputhop_length=512, inputCMAP='coolwarm'):
    
    print("Creating Chromagram Images...")
    
    # Validate required global variables exist
    try:
        file_paths, genres, genre_count
    except NameError as e:
        print(f"Error: Required global variable not found: {e}")
        return
    
    for i in tqdm(range(len(file_paths))):
        try:
            # print(f"Processing file index: {i}, File path: {file_paths[i]}")
            
            # Load and process audio
            y, sr = librosa.load(
                file_paths[i],
                sr=inputSampleRate,
                duration=inputDuration
            )
            
            # Ensure consistent length
            y = librosa.util.fix_length(y, size=inputSampleRate * inputDuration)
            
            # Generate chromagram
            chroma = librosa.feature.chroma_stft(
                y=y,
                sr=sr,
                n_fft=inputNFFT,
                hop_length=inputhop_length,
                norm=2
            )
            
            # Convert to dB scale and normalize
            chroma_db = librosa.power_to_db(chroma, ref=np.max)
            chroma_normalized = (chroma_db - np.mean(chroma_db)) / np.std(chroma_db)
            
            # Save chromagram(s) based on duration
            if inputDuration == 30:
                saved_path = _save_single_chromagram(
                    chroma_normalized, sr, inputhop_length, inputCMAP, 
                    file_paths[i], genres[genre_count[i]], inputDuration
                )
                if i % 25 == 0:
                    print(f"Processed {i} files, last saved to {saved_path}")
                    
            elif inputDuration == 3:
                saved_paths = _save_segmented_chromagrams(
                    chroma_normalized, sr, inputhop_length, inputCMAP,
                    file_paths[i], genres[genre_count[i]]
                )
                if i % 25 == 0:
                    print(f"Processed {i} files, saved {len(saved_paths)} segments")
            
            # Memory management
            if i % 25 == 0:
                gc.collect()
                
        except Exception as e:
            print(f"Error processing file {file_paths[i]}: {str(e)}")
            continue
    
    print("Chromagram images created successfully!")
    clear_output()  # Clear the output to keep the notebook clean

## 4 - Activate the 3-second or 30-second inputs

In [45]:
createChromagramImages(
	inputDuration=30,
	inputSampleRate=22050,
	inputNFFT=2048,
	inputhop_length=512,
	inputCMAP='coolwarm'
)