In [2]:
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast') 
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T,axis=0)
     
    return mfccs_processed

### Feature Extraction #1 

In [5]:
import os
import json
import librosa
import numpy as np

from pydub import AudioSegment
from scipy.io.wavfile import read as read_wav

def extract_features(file_path):
    try:
        # Load the first 30 seconds of the audio file
        audio = AudioSegment.from_file(file_path)[:30000]  # Get first 30 seconds
        audio.export("temp.wav", format="wav")  # Export as wav
        sample_rate, audio_data = read_wav("temp.wav")  # Read wav file

        # Compute MFCCs (you'll need to install the python_speech_features library)
        from python_speech_features import mfcc
        mfccs = mfcc(audio_data, samplerate=sample_rate, numcep=40)
        mfccs_processed = np.mean(mfccs, axis=0)
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(f"Exception: {e}")
        return None 
    return mfccs_processed.tolist()

features = []
labels = []
song_names = []  # List to store the names of the songs

folders = ['Red', 'Blue', 'Green', 'Yellow', 'Black', 'Gray', 'Sky Blue']

for i, folder in enumerate(folders):
    print(f'Processing folder {folder}...')
    for file_name in os.listdir(f'/Users/danielporras/Musica/music_import/Music/{folder}'):
        if file_name.endswith('.mp3') or file_name.endswith('.m4a'):
            file_path = os.path.join(f'/Users/danielporras/Musica/music_import/Music/{folder}', file_name)
            mfccs = extract_features(file_path)
            
            if mfccs is not None:  # Only append features and labels if mfccs could be computed
                features.append(mfccs)
                labels.append(i)  # use the folder index as the label
                song_names.append(file_name)  # Store the name of the song

# Save the features, labels, and song names to a JSON file
data = {
    "features": features,
    "labels": labels,
    "song_names": song_names
}

with open('data.json', 'w') as fp:
    json.dump(data, fp)

Processing folder Red...




Processing folder Blue...




Processing folder Green...




Processing folder Yellow...




Processing folder Black...




Processing folder Gray...




Processing folder Sky Blue...




In [6]:
import pandas as pd

# Convert features and labels to a pandas DataFrame
df = pd.DataFrame(features)
df['label'] = labels

# Print the first few rows of the DataFrame
print(df)

              0          1          2          3          4          5  \
0     16.376496  28.811688   1.413286 -15.710750  33.602086 -19.922297   
1     19.675282  17.522939  -5.850315 -32.867628  41.032371 -26.144989   
2     18.026084  24.145036  -6.055296 -33.304683  33.530252 -16.756789   
3     19.675282  17.522939  -5.850315 -32.867628  41.032371 -26.144989   
4     17.620489   7.949760  -5.535437 -11.649256  26.200835 -27.209739   
...         ...        ...        ...        ...        ...        ...   
1185  16.528522  22.506434  11.883110 -26.357287  20.325571 -28.282259   
1186  15.845086  23.999546  29.961570  -6.353121  11.232144 -29.466532   
1187  15.904542  21.430043   5.215098 -18.235004  44.509234 -24.500506   
1188  16.207152  21.844988   4.810943  -5.942010  27.585865 -13.400809   
1189  15.818882  23.562682  30.098126  -0.516538  21.104165 -13.606065   

              6          7          8          9  ...        17        18  \
0      7.173489  -6.683202  14.088

In [7]:
print(df['label'].unique())

[0 1 2 3 4 5 6]


In [8]:
print(df['label'].value_counts())

label
0    170
1    170
2    170
3    170
4    170
5    170
6    170
Name: count, dtype: int64


In [101]:
# Define the directory with the test songs
test_dir = '/Users/danielporras/Musica/music_import/test_songs'

# Load and preprocess the songs
test_songs, test_song_names = load_and_preprocess_songs(test_dir)

# Use the loaded model to make predictions
predictions = loaded_model.predict(test_songs)

# Get the class with the highest probability
predicted_classes = np.argmax(predictions, axis=1)

# Define the mapping from indices to class labels
class_labels = ['Red', 'Blue', 'Green', 'Yellow', 'Black', 'Gray', 'Sky Blue', 'Orange']

# Print the predicted classes along with the song names
for song_name, predicted_class in zip(test_song_names, predicted_classes):
    print(f'Song: {song_name}, Predicted class: {class_labels[predicted_class]}')



Song: watermarked_AEROPLANES_Groovy_Town_instrumental_2_31 copy.mp3, Predicted class: Yellow
Song: watermarked_Music_City_Reggae_Squad_Steel_Pan_Island_instrumental_2_36.mp3, Predicted class: Black
Song: Buscando-el-Son-no-voice_AdobeStock_709714291_preview copy.m4a, Predicted class: Orange
Song: watermarked_Music_City_Reggae_Squad_Steel_Pan_Island_instrumental_2_36 copy.mp3, Predicted class: Black
Song: watermarked_LNDO_Just_Look_Up_background_vocals_2_45 copy.mp3, Predicted class: Red
Song: watermarked_Jed_Stark_Holidaze_instrumental_2_03.mp3, Predicted class: Black
Song: watermarked_Material_Gurl_The_Happiest_Memories_instrumental_1_56.mp3, Predicted class: Gray
Song: Spanish-Summer-Dance_AdobeStock_452607663_preview copy.m4a, Predicted class: Red
Song: watermarked_Reveille_Go_For_It_instrumental_2_48 copy.mp3, Predicted class: Orange
Song: watermarked_Tiger_Gang_Hotpants_instrumental_1_20.mp3, Predicted class: Red
Song: watermarked_PALA_Imagination_background_vocals_3_08 copy.mp3, 

### Feature Extractino #2 Using Time Shift

In [104]:
import os
import json
import librosa
import numpy as np

def extract_features_with_shift(file_path, shift_time_ms=1000):
    try:
        # Load the first 30 seconds of the audio file
        audio = AudioSegment.from_file(file_path)[:30000]  # Get first 30 seconds

        # Create a silent audio segment of length shift_time_ms
        silence = AudioSegment.silent(duration=shift_time_ms)

        # Time shift the audio by concatenating silence to the beginning and removing equal duration from end
        audio = silence + audio[:-shift_time_ms]

        audio.export("temp.wav", format="wav")  # Export as wav
        sample_rate, audio_data = read_wav("temp.wav")  # Read wav file

        # Compute MFCCs
        from python_speech_features import mfcc
        mfccs = mfcc(audio_data, samplerate=sample_rate, numcep=40)
        mfccs_processed = np.mean(mfccs, axis=0)
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(f"Exception: {e}")
        return None 

    return mfccs_processed.tolist()

# Use the original extract_features function to create the original dataset
# ...

# Use the new extract_features_with_shift function to create the time-shifted dataset
features_shifted = []
labels_shifted = []
song_names_shifted = []

for i, folder in enumerate(folders):
    print(f'Processing folder {folder} with time shift...')
    for file_name in os.listdir(f'/Users/danielporras/Musica/music_import/Music/{folder}'):
        if file_name.endswith('.mp3') or file_name.endswith('.m4a'):
            file_path = os.path.join(f'/Users/danielporras/Musica/music_import/Music/{folder}', file_name)
            mfccs = extract_features_with_shift(file_path)
            
            if mfccs is not None:  # Only append features and labels if mfccs could be computed
                features_shifted.append(mfccs)
                labels_shifted.append(i)  # use the folder index as the label
                song_names_shifted.append(file_name)  # Store the name of the song

# Save the time-shifted features, labels, and song names to a separate JSON file
data_shifted = {
    "features": features_shifted,
    "labels": labels_shifted,
    "song_names": song_names_shifted
}

with open('data_shifted.json', 'w') as fp:
    json.dump(data_shifted, fp)

Processing folder Red with time shift...




Processing folder Blue with time shift...




Processing folder Green with time shift...




Processing folder Yellow with time shift...




Processing folder Black with time shift...




Processing folder Gray with time shift...




Processing folder Sky Blue with time shift...




Processing folder Orange with time shift...




In [106]:
import pandas as pd

# Convert features and labels to a pandas DataFrame
df_shifted = pd.DataFrame(features_shifted)
df_shifted['label'] = labels_shifted

# Print the first few rows of the DataFrame
print(df_shifted)

              0          1          2          3          4          5  \
0     14.610587  27.714609   1.322317 -15.124762  32.731843 -19.257700   
1     16.971260  16.920118  -4.940744 -31.374672  39.371372 -24.920137   
2     16.206447  23.392870  -5.699620 -31.968566  32.738849 -16.412719   
3     16.971260  16.920118  -4.940744 -31.374672  39.371372 -24.920137   
4     15.905440   7.556098  -4.990916 -10.568419  25.462356 -26.244429   
...         ...        ...        ...        ...        ...        ...   
1355  15.531795  25.692904  15.067837  -6.460210  18.035073  -9.317348   
1356  16.449863  22.153427 -13.820114 -23.654507  33.695369 -23.893322   
1357  14.631916  19.700689   3.786797 -20.996323  30.887521 -18.155167   
1358  15.039339  20.740800  -1.806234 -12.815613  29.483537  -9.796950   
1359  18.102179  16.993679  -9.557646 -16.646796  44.640764 -23.737521   

              6          7          8          9  ...        17        18  \
0      6.894010  -6.320214  13.639

In [107]:
print(len(features_shifted))
print(len(labels_shifted))

1360
1360


In [108]:
# Print all unique labels in the time-shifted data
print(df_shifted['label'].unique())

# Print the count of each label in the time-shifted data
print(df_shifted['label'].value_counts())

[0 1 2 3 4 5 6 7]
label
0    170
1    170
2    170
3    170
4    170
5    170
6    170
7    170
Name: count, dtype: int64


### Feature extraction random 

In [3]:
import os
import json
import librosa
import numpy as np

from pydub import AudioSegment
from scipy.io.wavfile import read as read_wav

import random

import random

def extract_features(file_path):
    try:
        # Load the audio file
        audio = AudioSegment.from_file(file_path)

        # If the audio is longer than 30 seconds
        if len(audio) > 30000:
            max_start_pos = len(audio) - 30000
            start_pos = random.randint(0, max_start_pos)
            
            # Get a random 30 seconds slice
            audio = audio[start_pos:start_pos+30000]
        else:
            audio = audio[:30000]  # Get first 30 seconds

        audio.export("temp.wav", format="wav")  # Export as wav
        sample_rate, audio_data = read_wav("temp.wav")  # Read wav file

        # Compute MFCCs (you'll need to install the python_speech_features library)
        from python_speech_features import mfcc
        mfccs = mfcc(audio_data, samplerate=sample_rate, numcep=40)
        mfccs_processed = np.mean(mfccs, axis=0)
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(f"Exception: {e}")
        return None 
    return mfccs_processed.tolist()
features = []
labels = []
song_names = []  # List to store the names of the songs

folders = ['Red', 'Blue', 'Green', 'Yellow', 'Black', 'Gray', 'Sky Blue']

for i, folder in enumerate(folders):
    print(f'Processing folder {folder}...')
    for file_name in os.listdir(f'/Users/danielporras/Musica/music_import/Music/{folder}'):
        if file_name.endswith('.mp3') or file_name.endswith('.m4a'):
            file_path = os.path.join(f'/Users/danielporras/Musica/music_import/Music/{folder}', file_name)
            mfccs = extract_features(file_path)
            
            if mfccs is not None:  # Only append features and labels if mfccs could be computed
                features.append(mfccs)
                labels.append(i)  # use the folder index as the label
                song_names.append(file_name)  # Store the name of the song

# Save the features, labels, and song names to a JSON file
data = {
    "features": features,
    "labels": labels,
    "song_names": song_names
}

# Save to JSON file
with open('random_data.json', 'w') as fp:
    json.dump(data, fp)

Processing folder Red...




Processing folder Blue...




Processing folder Green...




Processing folder Yellow...




Processing folder Black...




Processing folder Gray...




Processing folder Sky Blue...




In [4]:
import pandas as pd
import json

# Load data from JSON file
with open('random_data.json', 'r') as fp:
    data = json.load(fp)

# Convert features and labels to a pandas DataFrame
df_random = pd.DataFrame(data['features'])
df_random['label'] = data['labels']

# Print the first few rows of the DataFrame
print(df_random.head())

print(len(data['features']))
print(len(data['labels']))

# Print all unique labels in the random data
print(df_random['label'].unique())

# Print the count of each label in the random data
print(df_random['label'].value_counts())

           0          1         2          3          4          5          6  \
0  16.844739  30.422090  1.830816 -18.123041  27.126715 -21.722195   5.398213   
1  19.675282  17.522939 -5.850315 -32.867628  41.032371 -26.144989   9.659302   
2  18.211275  23.736535 -5.919252 -35.278614  34.428249 -16.555121  22.410193   
3  19.675282  17.522939 -5.850315 -32.867628  41.032371 -26.144989   9.659302   
4  17.640900   7.985857 -5.525054 -11.677214  26.195470 -27.270709  24.839717   

           7          8          9  ...        17        18        19  \
0  -8.348316  13.117606 -26.387807  ...  4.015582 -0.238436  1.316470   
1 -15.054356  15.836676 -34.594575  ...  5.561156 -2.148287  0.657969   
2  -5.160546  21.557241 -24.473887  ...  4.717976 -2.524040  1.550600   
3 -15.054356  15.836676 -34.594575  ...  5.561156 -2.148287  0.657969   
4 -16.494738  -0.356620   4.875568  ... -0.866263  1.762138  0.813802   

         20        21        22        23        24        25  label  
0  

### Test Model H6

In [110]:
from tensorflow.keras.models import load_model

# Load the model
loaded_model2 = load_model('/Users/danielporras/Musica/music_x/best_model.h6')



In [None]:
def extract_features(file_path):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast') 
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T,axis=0)
     
    return mfccs_processed

In [None]:
import os
import json
import librosa
import numpy as np

from pydub import AudioSegment
from scipy.io.wavfile import read as read_wav

def extract_features(file_path):
    try:
        # Load the first 30 seconds of the audio file
        audio = AudioSegment.from_file(file_path)[:30000]  # Get first 30 seconds
        audio.export("temp.wav", format="wav")  # Export as wav
        sample_rate, audio_data = read_wav("temp.wav")  # Read wav file

        # Compute MFCCs (you'll need to install the python_speech_features library)
        from python_speech_features import mfcc
        mfccs = mfcc(audio_data, samplerate=sample_rate, numcep=40)
        mfccs_processed = np.mean(mfccs, axis=0)
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(f"Exception: {e}")
        return None 
    return mfccs_processed.tolist()

features = []
labels = []
song_names = []  # List to store the names of the songs

folders = ['Red', 'Blue', 'Green', 'Yellow', 'Black', 'Gray', 'Sky Blue', 'Orange']

for i, folder in enumerate(folders):
    print(f'Processing folder {folder}...')
    for file_name in os.listdir(f'/Users/danielporras/Musica/music_import/Music/{folder}'):
        if file_name.endswith('.mp3') or file_name.endswith('.m4a'):
            file_path = os.path.join(f'/Users/danielporras/Musica/music_import/Music/{folder}', file_name)
            mfccs = extract_features(file_path)
            
            if mfccs is not None:  # Only append features and labels if mfccs could be computed
                features.append(mfccs)
                labels.append(i)  # use the folder index as the label
                song_names.append(file_name)  # Store the name of the song

# Save the features, labels, and song names to a JSON file
data = {
    "features": features,
    "labels": labels,
    "song_names": song_names
}

with open('data.json', 'w') as fp:
    json.dump(data, fp)

Processing folder Red...




Processing folder Blue...




Processing folder Green...




Processing folder Yellow...




Processing folder Black...




Processing folder Gray...




Processing folder Sky Blue...




Processing folder Orange...




In [None]:
import pandas as pd

# Convert features and labels to a pandas DataFrame
df = pd.DataFrame(features)
df['label'] = labels

# Print the first few rows of the DataFrame
print(df)

              0          1          2          3          4          5  \
0     16.376496  28.811688   1.413286 -15.710750  33.602086 -19.922297   
1     19.675282  17.522939  -5.850315 -32.867628  41.032371 -26.144989   
2     18.026084  24.145036  -6.055296 -33.304683  33.530252 -16.756789   
3     19.675282  17.522939  -5.850315 -32.867628  41.032371 -26.144989   
4     17.620489   7.949760  -5.535437 -11.649256  26.200835 -27.209739   
...         ...        ...        ...        ...        ...        ...   
1355  17.314374  26.512974  15.196931  -6.693091  19.302271  -9.787532   
1356  18.250844  22.613254 -14.332369 -24.030397  35.132068 -24.128890   
1357  16.360278  20.238952   3.770664 -21.760347  32.424997 -18.917337   
1358  16.860117  21.172417  -2.615773 -13.888071  31.149777 -10.566078   
1359  19.972477  17.526055 -10.241332 -17.500090  46.912233 -24.612512   

              6          7          8          9  ...        17        18  \
0      7.173489  -6.683202  14.088

In [None]:
print(df['label'].unique())

[0 1 2 3 4 5 6 7]


In [None]:
print(df['label'].value_counts())

label
0    170
1    170
2    170
3    170
4    170
5    170
6    170
7    170
Name: count, dtype: int64


In [None]:
import os

folder_path = '/Users/danielporras/Musica/music_import/Music/Sky Blue'
audio_files = [f for f in os.listdir(folder_path) if f.endswith('.mp3') or f.endswith('.m4a')]

print(f"There are {len(audio_files)} audio files in the 'Black' folder.")

There are 115 audio files in the 'Black' folder.


In [None]:
from tensorflow.keras.models import load_model

# Load the model
loaded_model = load_model('/Users/danielporras/Musica/music_x/best_model.h5')



In [None]:
# Get the output layer
output_layer = loaded_model.layers[-1]

# Print the number of classes
print('Number of classes:', output_layer.output_shape[1])

Number of classes: 10


In [None]:
from sklearn.preprocessing import LabelEncoder

# Assume 'labels' is a list of your labels
labels = ['Red', 'Blue', 'Green', 'Yellow', 'Black', 'Gray', 'Sky Blue', 'Orange']

encoder = LabelEncoder()
encoder.fit(labels)

print('Class labels:', encoder.classes_)

Class labels: ['Black' 'Blue' 'Gray' 'Green' 'Orange' 'Red' 'Sky Blue' 'Yellow']


In [None]:
def load_and_preprocess_songs(directory):
    song_features = []
    song_names = []  # List to store the names of the songs
    
    for filename in os.listdir(directory):
        if filename.endswith('.mp3') or filename.endswith('.m4a'):  # Assuming the songs are in .mp3 or .m4a format
            song_path = os.path.join(directory, filename)
            mfccs = extract_features(song_path)
            
            if mfccs is not None:  # Only append features if mfccs could be computed
                # Reshape the data to match the expected input shape
                mfccs = np.reshape(mfccs, (26, 1, 1))
                song_features.append(mfccs)
                song_names.append(filename)  # Store the name of the song
                
    # Convert list to numpy array
    song_features = np.array(song_features)

    # Ensure the data has the correct shape
    if len(song_features.shape) < 4:
        # Add a dimension for the batch size
        song_features = np.expand_dims(song_features, axis=0)

    return song_features, song_names

In [None]:
import os
import json
import librosa
import numpy as np

def extract_features_with_shift(file_path, shift_time_ms=1000):
    try:
        # Load the first 30 seconds of the audio file
        audio = AudioSegment.from_file(file_path)[:30000]  # Get first 30 seconds

        # Create a silent audio segment of length shift_time_ms
        silence = AudioSegment.silent(duration=shift_time_ms)

        # Time shift the audio by concatenating silence to the beginning and removing equal duration from end
        audio = silence + audio[:-shift_time_ms]

        audio.export("temp.wav", format="wav")  # Export as wav
        sample_rate, audio_data = read_wav("temp.wav")  # Read wav file

        # Compute MFCCs
        from python_speech_features import mfcc
        mfccs = mfcc(audio_data, samplerate=sample_rate, numcep=40)
        mfccs_processed = np.mean(mfccs, axis=0)
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(f"Exception: {e}")
        return None 

    return mfccs_processed.tolist()

# Use the original extract_features function to create the original dataset
# ...

# Use the new extract_features_with_shift function to create the time-shifted dataset
features_shifted = []
labels_shifted = []
song_names_shifted = []

for i, folder in enumerate(folders):
    print(f'Processing folder {folder} with time shift...')
    for file_name in os.listdir(f'/Users/danielporras/Musica/music_import/Music/{folder}'):
        if file_name.endswith('.mp3') or file_name.endswith('.m4a'):
            file_path = os.path.join(f'/Users/danielporras/Musica/music_import/Music/{folder}', file_name)
            mfccs = extract_features_with_shift(file_path)
            
            if mfccs is not None:  # Only append features and labels if mfccs could be computed
                features_shifted.append(mfccs)
                labels_shifted.append(i)  # use the folder index as the label
                song_names_shifted.append(file_name)  # Store the name of the song

# Save the time-shifted features, labels, and song names to a separate JSON file
data_shifted = {
    "features": features_shifted,
    "labels": labels_shifted,
    "song_names": song_names_shifted
}

with open('data_shifted.json', 'w') as fp:
    json.dump(data_shifted, fp)

Processing folder Red with time shift...




Processing folder Blue with time shift...




Processing folder Green with time shift...




Processing folder Yellow with time shift...




Processing folder Black with time shift...




Processing folder Gray with time shift...




Processing folder Sky Blue with time shift...




Processing folder Orange with time shift...




### ResNets Feature Extraction MFCCS

In [1]:
import os
import json
import librosa
import numpy as np

def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccs = np.expand_dims(mfccs, axis=-1).tolist()  # Convert to list for JSON serialization
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(f"Exception: {e}")
        return None 
    return mfccs

features = []
labels = []
song_names = []

folders = ['Red', 'Blue', 'Green', 'Yellow', 'Black', 'Gray', 'Sky Blue', 'Orange']

for i, folder in enumerate(folders):
    print(f'Processing folder {folder}...')
    for file_name in os.listdir(f'/Users/danielporras/Musica/music_import/Music/{folder}'):
        if file_name.endswith('.mp3') or file_name.endswith('.m4a'):
            file_path = os.path.join(f'/Users/danielporras/Musica/music_import/Music/{folder}', file_name)
            mfccs = extract_features(file_path)
            
            if mfccs is not None:  # Only append features and labels if mfccs could be computed
                features.append(mfccs)
                labels.append(i)  # use the folder index as the label
                song_names.append(file_name)  # Store the name of the song

# Save the features, labels, and song names to a JSON file
data = {
    "features": features,
    "labels": labels,
    "song_names": song_names
}

with open('resnet_data.json', 'w') as fp:
    json.dump(data, fp)

Processing folder Red...


  audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Processing folder Blue...
Processing folder Green...
Processing folder Yellow...
Processing folder Black...
Processing folder Gray...
Processing folder Sky Blue...
Processing folder Orange...
