In [11]:
import os
import librosa
import numpy as np
from pymongo import MongoClient
from gridfs import GridFS

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['Dataset']
fs = GridFS(db)

# Folder containing MP3 files
folder_path = r'F:\New folder\spotify\fma_large'

# Function to compute features
def compute_features(audio_file):
    # Load the audio file
    y, sr = librosa.load(audio_file)

    # Compute MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr)

    # Compute spectral centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

    # Compute zero-crossing rate
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)

    # Compute statistics for MFCCs
    mfccs_stats = {
    "mean": np.mean(mfccs, axis=1),
    "median": np.median(mfccs, axis=1),
    "std": np.std(mfccs, axis=1),
                                }
    
    # Convert NumPy arrays to Python lists
    mfccs_stats["mean"] = mfccs_stats["mean"].tolist()
    mfccs_stats["median"] = mfccs_stats["median"].tolist()
    mfccs_stats["std"] = mfccs_stats["std"].tolist()

# Compute statistics for spectral centroid
    spectral_centroid_stats = {
    "mean": np.mean(spectral_centroid),
    "median": np.median(spectral_centroid),
    "std": np.std(spectral_centroid),
}
    
    
    
    # Convert NumPy arrays to Python lists
    spectral_centroid_stats["mean"] = spectral_centroid_stats["mean"].tolist()
    spectral_centroid_stats["median"] = spectral_centroid_stats["median"].tolist()
    spectral_centroid_stats["std"] = spectral_centroid_stats["std"].tolist()

# Compute statistics for zero-crossing rate
    zero_crossing_rate_stats = {
    "mean": np.mean(zero_crossing_rate),
    "median": np.median(zero_crossing_rate),
    "std": np.std(zero_crossing_rate),
    }

# Convert NumPy arrays to Python lists
    zero_crossing_rate_stats["mean"] = zero_crossing_rate_stats["mean"].tolist()
    zero_crossing_rate_stats["median"] = zero_crossing_rate_stats["median"].tolist()
    zero_crossing_rate_stats["std"] = zero_crossing_rate_stats["std"].tolist()

    return {
    "mfccs_stats": mfccs_stats,
    "spectral_centroid_stats": spectral_centroid_stats,
    "zero_crossing_rate_stats": zero_crossing_rate_stats,
}
# Iterate over files in the folder
for folder_name in range(0, 6):
    folder_name = str(folder_name).zfill(3)  # Pad with zeros if needed
    folder = os.path.join(folder_path, folder_name)
    if os.path.isdir(folder):
        for filename in os.listdir(folder):
            if filename.endswith('.mp3'):
                # File path
                file_path = os.path.join(folder, filename)
                
                # Compute features
                features = compute_features(file_path)

                # Read audio file content
                with open(file_path, 'rb') as mp3_file:
                    mp3_content = mp3_file.read()

                # Store audio file and features in MongoDB
                file_id = fs.put(mp3_content, filename=filename)
                features['file_id'] = file_id  # Add file ID to features
                db.features.insert_one(features)  # Insert features into MongoDB

                print(f"File '{filename}' processed and saved with ID: {file_id}")

print("All files processed and saved successfully.")


File '000002.mp3' processed and saved with ID: 663a6386501018d7374ef6e9
File '000003.mp3' processed and saved with ID: 663a6386501018d7374ef6ef
File '000005.mp3' processed and saved with ID: 663a6386501018d7374ef6f5
File '000010.mp3' processed and saved with ID: 663a6386501018d7374ef6fb
File '000020.mp3' processed and saved with ID: 663a6387501018d7374ef700
File '000026.mp3' processed and saved with ID: 663a6387501018d7374ef706
File '000030.mp3' processed and saved with ID: 663a6387501018d7374ef70c
File '000046.mp3' processed and saved with ID: 663a6387501018d7374ef712
File '000048.mp3' processed and saved with ID: 663a6387501018d7374ef718
File '000134.mp3' processed and saved with ID: 663a6387501018d7374ef71e
File '000135.mp3' processed and saved with ID: 663a6387501018d7374ef724
File '000136.mp3' processed and saved with ID: 663a6388501018d7374ef72a
File '000137.mp3' processed and saved with ID: 663a6388501018d7374ef730
File '000138.mp3' processed and saved with ID: 663a6388501018d73

KeyboardInterrupt: 

In [15]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['Dataset']  # Assuming 'Dataset' is your MongoDB database name

# Assuming 'metadata' is the collection where you stored metadata
metadata_collection = db['fs.files']

# Find all documents in the 'metadata' collection
cursor = metadata_collection.find({})

# Iterate over the cursor and print each document
for document in cursor:
    print(document)


{'_id': ObjectId('663a6386501018d7374ef6e9'), 'filename': '000002.mp3', 'chunkSize': 261120, 'length': 960738, 'uploadDate': datetime.datetime(2024, 5, 7, 17, 23, 18, 520000)}
{'_id': ObjectId('663a6386501018d7374ef6ef'), 'filename': '000003.mp3', 'chunkSize': 261120, 'length': 961632, 'uploadDate': datetime.datetime(2024, 5, 7, 17, 23, 18, 657000)}
{'_id': ObjectId('663a6386501018d7374ef6f5'), 'filename': '000005.mp3', 'chunkSize': 261120, 'length': 961580, 'uploadDate': datetime.datetime(2024, 5, 7, 17, 23, 18, 796000)}
{'_id': ObjectId('663a6386501018d7374ef6fb'), 'filename': '000010.mp3', 'chunkSize': 261120, 'length': 721040, 'uploadDate': datetime.datetime(2024, 5, 7, 17, 23, 18, 928000)}
{'_id': ObjectId('663a6387501018d7374ef700'), 'filename': '000020.mp3', 'chunkSize': 261120, 'length': 961208, 'uploadDate': datetime.datetime(2024, 5, 7, 17, 23, 19, 72000)}
{'_id': ObjectId('663a6387501018d7374ef706'), 'filename': '000026.mp3', 'chunkSize': 261120, 'length': 961215, 'uploadDat

In [24]:
from gridfs.errors import NoFile

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['Dataset']
fs = GridFS(db)

# Retrieve stored data from MongoDB
cursor = db.features.find({})

# Iterate over the cursor and print or process each document
for document in cursor:
    # Access the data from the document
    mfccs_stats = document['mfccs_stats']
    spectral_centroid_stats = document['spectral_centroid_stats']
    zero_crossing_rate_stats = document['zero_crossing_rate_stats']
    file_id = document['file_id']  # Corrected key access
    
    # Print file ID for debugging
    print("File ID:", file_id)
    
    try:
        # Retrieve audio content from GridFS using the file_id
        audio_content = fs.get(file_id).read()
        
        # Process the data as needed
        print("MFCCs Mean:", mfccs_stats["mean"])
        print("Spectral Centroid Mean:", spectral_centroid_stats["mean"])
        print("Zero-Crossing Rate Mean:", zero_crossing_rate_stats["mean"])
        
        # Convert back to NumPy arrays if needed
        mfccs_mean = np.array(mfccs_stats["mean"])
        spectral_centroid_mean = np.array(spectral_centroid_stats["mean"])
        zero_crossing_rate_mean = np.array(zero_crossing_rate_stats["mean"])
        
        # Further processing or visualization...
    except NoFile:
        print("No audio content found for file ID:", file_id)
    except Exception as e:
        print("Error:", e)


File ID: 663a6155501018d7374ede40
No audio content found for file ID: 663a6155501018d7374ede40
File ID: 663a6157501018d7374ede46
No audio content found for file ID: 663a6157501018d7374ede46
File ID: 663a6157501018d7374ede4c
No audio content found for file ID: 663a6157501018d7374ede4c
File ID: 663a6157501018d7374ede52
No audio content found for file ID: 663a6157501018d7374ede52
File ID: 663a6158501018d7374ede57
No audio content found for file ID: 663a6158501018d7374ede57
File ID: 663a6158501018d7374ede5d
No audio content found for file ID: 663a6158501018d7374ede5d
File ID: 663a6158501018d7374ede63
No audio content found for file ID: 663a6158501018d7374ede63
File ID: 663a6158501018d7374ede69
No audio content found for file ID: 663a6158501018d7374ede69
File ID: 663a6158501018d7374ede6f
No audio content found for file ID: 663a6158501018d7374ede6f
File ID: 663a6158501018d7374ede75
No audio content found for file ID: 663a6158501018d7374ede75
File ID: 663a6158501018d7374ede7b
No audio content