In [None]:
import os
import pymongo
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from joblib import Parallel, delayed, Memory

In [None]:
print(librosa.__version__)

In [None]:
tracks = pd.read_csv('fma_metadata\\tracks.csv')
genres = pd.read_csv('fma_metadata\\genres.csv')
features = pd.read_csv('fma_metadata\\features.csv')
echonest = pd.read_csv('fma_metadata\\echonest.csv')
raw_albums = pd.read_csv('fma_metadata\\raw_albums.csv')
raw_artists = pd.read_csv('fma_metadata\\raw_artists.csv')
raw_genres = pd.read_csv('fma_metadata\\raw_genres.csv')
raw_tracks = pd.read_csv('fma_metadata\\raw_tracks.csv')
raw_echonest = pd.read_csv('fma_metadata\\raw_echonest.csv')

In [None]:
# finding a specifc artist in the raw_artist by using the name and if not found display a message
def find_artist(artist_name):
    # convert the artist_name column to lower case
    artist_name = artist_name.lower()
    raw_artists['artist_name'] = raw_artists['artist_name'].str.lower()
    
    # check if the artist_name is in the raw_artists dataframe
    if artist_name.lower() in raw_artists['artist_name'].values:
        print('Artist found')
    else:
        print('Artist not found')
        
        
artist_name = 'lucky dragons'
find_artist(artist_name)

In [None]:
# Path to the directory containing audio files
AUDIO_DIR = r"D:\\fma_small1"

# Get all audio files
audio_files = []
for root, dirs, files in os.walk(AUDIO_DIR):
    for file in files:
        if file.endswith(".mp3"):
            audio_files.append(os.path.join(root, file))

# Create a memory object for caching
memory = Memory("cache", verbose=0)

# Function to process audio file and extract features
@memory.cache
def extract_features(file):
    try:
        y, sr = librosa.load(file, sr=None)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
        return mfcc, spectral_centroid, zero_crossing_rate
    except Exception as e:
        print(f"Error loading file {file}: {e}")
        return np.array([]), np.array([]), np.array([])


# Use joblib to process the files in parallel and extract features
features = Parallel(n_jobs=-1)(
    delayed(extract_features)(file) for file in tqdm(audio_files, total=len(audio_files))
)

# Unpack features
mfcc_features, spectral_centroid_features, zero_crossing_rate_features = zip(*features)

# Print the type of each feature
print(type(mfcc_features))
print(type(spectral_centroid_features))
print(type(zero_crossing_rate_features))

In [None]:
print(spectral_centroid_features)

In [None]:
# Convert features to NumPy arrays
mfcc_features_list = np.concatenate([f.T for f in mfcc_features if f.size > 0], axis=0)
spectral_centroid_features_list = np.concatenate([f.T for f in spectral_centroid_features if f.size > 0], axis=0)
zero_crossing_rate_features_list = np.concatenate([f.T for f in zero_crossing_rate_features if f.size > 0], axis=0)


# RobustScaler for standardization
normalizer = MinMaxScaler()
robust_standardized_mfcc_features = normalizer.fit_transform(mfcc_features_list)
normalized_spectral_centroid = normalizer.fit_transform(spectral_centroid_features_list)
normalized_zero_crossing_rate = normalizer.fit_transform(zero_crossing_rate_features_list)

# Display the first 5 rows of the normalized MFCC features
print("Standardized MFCC Features: ",robust_standardized_mfcc_features[:5])
print("________________________")
print("Normalized Spectral Centrioid: ",normalized_spectral_centroid[:5])
print("________________________")
print("Normalized Zero Crossing Rate: ",normalized_zero_crossing_rate[:5])

In [None]:
# applying pca on the features
from sklearn.decomposition import PCA

# Initialize PCA with 2 components
pca = PCA(n_components=2)

# Fit and transform the standardized MFCC features
pca_mfcc_features = pca.fit_transform(robust_standardized_mfcc_features)

# Display the shape of the PCA features
print(pca_mfcc_features)

In [None]:
# # save the normalized features to a npy
# np.save('normalized_mfcc_features.npy', robust_standardized_mfcc_features)
# np.save('normalized_spectral_centroid.npy', normalized_spectral_centroid)
# np.save('normalized_zero_crossing_rate.npy', normalized_zero_crossing_rate)


In [None]:
# saving the features to a mongoDB 
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["music_features"]
mfcc_collection = db["mfcc_features"]
spectral_centroid_collection = db["spectral_centroid"]
zero_crossing_rate_collection = db["zero_crossing_rate"]

# Insert the features into the collection
# convert the features to a list
# Convert the features to a list of dictionaries
robust_standardized_mfcc_features = [ {"value": x} for x in robust_standardized_mfcc_features ]
normalized_spectral_centroid = [ {"value": x} for x in normalized_spectral_centroid ]
normalized_zero_crossing_rate = [ {"value": x} for x in normalized_zero_crossing_rate ]

# Insert the features into the collection
mfcc_collection.insert_many(robust_standardized_mfcc_features)
spectral_centroid_collection.insert_many(normalized_spectral_centroid)
zero_crossing_rate_collection.insert_many(normalized_zero_crossing_rate)


In [None]:
# Display the first document in the collection
print(mfcc_collection.find_one())
print(spectral_centroid_collection.find_one())
print(zero_crossing_rate_collection.find_one())