# WAV to Tempograms

## Cell 1 - Imports

In [5]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras import layers
from keras.optimizers import Adam

## 2 - Grab the file paths for all the music files

In [6]:
DATASET_PATH = "Data/genres_original"
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
file_paths = []
genre_count = []

# Iterate through each genre and load the audio files
for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if os.path.isdir(genre_path):
        for file in os.listdir(genre_path):
            if file.endswith(".wav"):
                file_path = os.path.join(genre_path, file)
                try:
                    # Add file path to the list
                    file_paths.append(file_path)
                    # Assign label based on genre index
                    genre_count.append(genres.index(genre))
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
                    continue

# Print the number of loaded files and labels
print("Files loaded:", len(file_paths))
print("Labels loaded:", len(genre_count))
print("Genres loaded:", len(genres))

genre_count[400], file_paths[400]

Files loaded: 1000
Labels loaded: 1000
Genres loaded: 10


(4, 'Data/genres_original\\hiphop\\hiphop.00000.wav')

## 3 - Show an exmaple of a tempogram on the console

## 4 - Create the folders and create the tempograms

In [3]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import gc

def create_tempogram_full(genre, file_path):
    # Load the audio
    y, sr = librosa.load(file_path, sr=22050)

    # Compute onset strength envelope
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)

    # Compute tempogram: this captures rhythmic patterns
    tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)

    # Create a figure with tight layout and no extra space
    plt.figure(figsize=(6, 4), frameon=False)

    # Create the subplot with no padding
    ax = plt.subplot(111)

    # Plot tempogram
    librosa.display.specshow(tempogram, sr=sr, ax=ax, cmap='viridis')

    # Remove axes, ticks, and labels
    ax.set_axis_off()

    # Set the background to transparent
    ax.set_facecolor('none')

    # Remove all margins and padding
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)

    # Prepare the saving path
    savingFilePath = os.path.join("Data", "tempograms (30 secs)", genre,
                                  os.path.basename(file_path[:-4]) + "_full.png")

    # Create any missing directories
    os.makedirs(os.path.dirname(savingFilePath), exist_ok=True)

    # Save with tight bounding box and explicitly set zero padding
    plt.savefig(savingFilePath, bbox_inches='tight', pad_inches=0, transparent=True, dpi=300, format='png')

    # Clean up
    plt.clf()
    plt.close('all')  # Close all figures to prevent memory leaks

    # Optionally, collect garbage to manage memory usage
    gc.collect()

# Create the parent directory for the tempograms
os.makedirs(os.path.join("Data", "tempograms (30 secs)"), exist_ok=True)

# Create the folders for saving the tempograms
for genre in genres:
    os.makedirs(os.path.join("Data", "tempograms (30 secs)", genre), exist_ok=True)

# Iterate through each song, generating a full 30-second tempogram
for i in range(len(file_paths)):
    genre = genres[genre_count[i]]  # Ensure the genre matches the index
    file_path = file_paths[i]       # Get the corresponding file path
    print(f"Processing tempogram for {file_path}")

    create_tempogram_full(genre, file_path)


Processing tempogram for Data/genres_original\blues\blues.00000.wav
Processing tempogram for Data/genres_original\blues\blues.00001.wav
Processing tempogram for Data/genres_original\blues\blues.00002.wav
Processing tempogram for Data/genres_original\blues\blues.00003.wav
Processing tempogram for Data/genres_original\blues\blues.00004.wav
Processing tempogram for Data/genres_original\blues\blues.00005.wav
Processing tempogram for Data/genres_original\blues\blues.00006.wav
Processing tempogram for Data/genres_original\blues\blues.00007.wav
Processing tempogram for Data/genres_original\blues\blues.00008.wav
Processing tempogram for Data/genres_original\blues\blues.00009.wav
Processing tempogram for Data/genres_original\blues\blues.00010.wav
Processing tempogram for Data/genres_original\blues\blues.00011.wav
Processing tempogram for Data/genres_original\blues\blues.00012.wav
Processing tempogram for Data/genres_original\blues\blues.00013.wav
Processing tempogram for Data/genres_original\bl

## 5 - Create the same thing, but for 3 second clips

In [7]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import gc

def create_tempogram_clip(genre, file_path, clip_number, total_clips=10):
    # Load the audio
    y, sr = librosa.load(file_path, sr=22050)
    
    # Calculate the start and end time for the clip
    start_time = (clip_number - 1) * 3  # 3-second intervals
    end_time = clip_number * 3
    
    # Slice the audio to the desired time segment
    y_segment = y[int(start_time * sr): int(end_time * sr)]
    
    # Compute onset strength envelope
    onset_env = librosa.onset.onset_strength(y=y_segment, sr=sr)
    
    # Compute tempogram: this captures rhythmic patterns
    tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
    
    # Create a figure with tight layout and no extra space
    plt.figure(figsize=(2, 4), frameon=False)
    
    # Create the subplot with no padding
    ax = plt.subplot(111)
    
    # Plot tempogram
    librosa.display.specshow(tempogram, sr=sr, ax=ax, cmap='viridis')
    
    # Remove axes, ticks, and labels
    ax.set_axis_off()
    
    # Set the background to transparent
    ax.set_facecolor('none')
    
    # Remove all margins and padding
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
    
    # Prepare the saving path
    savingFilePath = os.path.join("Data", "tempograms (3 secs)", genre, 
                                 os.path.basename(file_path[:-4]) + f"_clip_{clip_number}_of_{total_clips}.png")
    
    # Create any missing directories
    os.makedirs(os.path.dirname(savingFilePath), exist_ok=True)
    
    # Save with tight bounding box and explicitly set zero padding
    plt.savefig(savingFilePath, bbox_inches='tight', pad_inches=0, transparent=True, dpi=300, format='png')
    
    # Clean up
    plt.clf()
    plt.close('all')  # Close all figures to prevent memory leaks
    
    # Optionally, collect garbage to manage memory usage
    if clip_number % 50 == 0:
        gc.collect()

# Create the parent directory for the tempograms
os.makedirs(os.path.join("Data", "tempograms (3 secs)"), exist_ok=True)

# Create the folders for saving the tempograms
for genre in genres:
    os.makedirs(os.path.join("Data", "tempograms (3 secs)", genre), exist_ok=True)

# Iterate through each song, generating 10 clips per song
for i in range(len(file_paths)):  # Iterate over all file paths
    genre = genres[genre_count[i]]  # Ensure the genre matches the index
    file_path = file_paths[i]  # Get the corresponding file path
    print(f"Processing tempograms for {file_path}")
    
    # Generate 10 segments for each song (each 3 seconds)
    for clip_number in range(1, 11):  # From clip 1 to 10
        create_tempogram_clip(genre, file_path, clip_number)

Processing tempograms for Data/genres_original\blues\blues.00000.wav
Processing tempograms for Data/genres_original\blues\blues.00001.wav
Processing tempograms for Data/genres_original\blues\blues.00002.wav
Processing tempograms for Data/genres_original\blues\blues.00003.wav
Processing tempograms for Data/genres_original\blues\blues.00004.wav
Processing tempograms for Data/genres_original\blues\blues.00005.wav
Processing tempograms for Data/genres_original\blues\blues.00006.wav
Processing tempograms for Data/genres_original\blues\blues.00007.wav
Processing tempograms for Data/genres_original\blues\blues.00008.wav
Processing tempograms for Data/genres_original\blues\blues.00009.wav
Processing tempograms for Data/genres_original\blues\blues.00010.wav
Processing tempograms for Data/genres_original\blues\blues.00011.wav
Processing tempograms for Data/genres_original\blues\blues.00012.wav
Processing tempograms for Data/genres_original\blues\blues.00013.wav
Processing tempograms for Data/gen