In [47]:
import os
import pandas as pd
import librosa
import numpy as np
from pymongo import MongoClient
from tqdm import tqdm
from mutagen.mp3 import MP3

# Define file paths
metadata_path = "fma_meta/fma_metadata/tracks.csv"
extracted_data_folder = "fma_large"
updated_genres_file = "fma_meta/fma_metadata/updated_genres_and_track_ids.csv"
echonest_data_file = 'fma_meta/fma_metadata/echonest_1.csv'

# Load metadata
metadata = pd.read_csv(metadata_path, index_col=0)

# Load updated genres
updated_genres = pd.read_csv(updated_genres_file)

# Load echonest data


# Define MongoDB connection parameters

# Load echonest data
echonest_csv_path = 'fma_meta/fma_metadata/echonest_2.csv'
echonest_data = pd.read_csv(echonest_csv_path, index_col='track_id')
# Convert track_id index to int32 for compatibility with MongoDB _id
echonest_data.index = echonest_data.index.astype('int32')

# Define MongoDB connection parameters
mongo_client = MongoClient('localhost', 27017)
db = mongo_client['music_recommendation_40']
collection = db['tracks']

for _, row in tqdm(echonest_data.iterrows(), total=echonest_data.shape[0]):
    document = {
        '_id': int(row.name),  # Using track_id as the document ID
        'acousticness': row['acousticness'],
        'danceability': row['danceability'],
        'energy': row['energy'],
        'instrumentalness': row['instrumentalness'],
        'liveness': row['liveness'],
        'speechiness': row['speechiness'],
        'tempo': row['tempo'],
        'valence': row['valence']
    }
    
    # Insert the document into the collection, handle duplicates gracefully
    collection.update_one({'_id': int(row.name)}, {'$set': document}, upsert=True)

def extract_features(audio_path):
    try:
        y, sr = librosa.load(audio_path, sr=None)  # Load audio file
        mfccs = librosa.feature.mfcc(y=y, sr=sr)  # Extract MFCCs
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]  # Extract spectral centroid
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)[0]  # Extract zero-crossing rate
        
        # Normalize features
        mfccs = librosa.util.normalize(mfccs)
        spectral_centroid = librosa.util.normalize(spectral_centroid)
        zero_crossing_rate = librosa.util.normalize(zero_crossing_rate)
        
        # Flatten features
        mfccs_flat = mfccs.mean(axis=1).flatten()
        spectral_centroid_flat = spectral_centroid.flatten()
        zero_crossing_rate_flat = np.array([np.mean(zero_crossing_rate)])  # Scalar value
        
        # Concatenate features
        features = np.concatenate([mfccs_flat, spectral_centroid_flat, zero_crossing_rate_flat])
        return features
    except Exception as e:
        print(f"Error extracting features from {audio_path}: {e}")
        return None

# Function to extract album title from audio files
def extract_album_title(audio_path):
    try:
        audio = MP3(audio_path)
        album_title = audio.get('TALB')
        return album_title[0] if album_title else None
    except Exception as e:
        print(f"Error extracting album title from {audio_path}: {e}")
        return None

# Function to extract artist name from audio files
def extract_artist_name(audio_path):
    try:
        audio = MP3(audio_path)
        artist_name = audio.get('TPE1')
        return artist_name[0] if artist_name else None
    except Exception as e:
        print(f"Error extracting artist name from {audio_path}: {e}")
        return None

def extract_track_title(audio_path):
    try:
        audio = MP3(audio_path)
        track_title = audio.get('TIT2')
        return track_title[0] if track_title else None
    except Exception as e:
        print(f"Error extracting track title from {audio_path}: {e}")
        return None

# Define functions for extracting metadata and features from audio files
def extract_metadata_and_features(folder_path, total_files):
    progress_bar = tqdm(total=total_files, desc="Processing audio files", dynamic_ncols=True)
    processed_files = 0

    for folder_name in os.listdir(folder_path):
        folder_path = os.path.join(extracted_data_folder, folder_name)
        if os.path.isdir(folder_path):
            for audio_file in os.listdir(folder_path):
                audio_path = os.path.join(folder_path, audio_file)
                if audio_file.endswith(".mp3") and os.path.isfile(audio_path):
                    track_id = int(audio_file.split(".")[0])
                    process_audio_file(audio_path, track_id)
                    processed_files += 1
                    progress_bar.update(1)
                    if processed_files >= total_files:
                        break
            if processed_files >= total_files:
                break

    progress_bar.close()

def process_audio_file(audio_path, track_id):
    try:
        print(f"Processing track ID: {track_id}")
        
        # Extract features and metadata
        features = extract_features(audio_path)
        
        # Map genre_top based on track_id
        genre_title = updated_genres.loc[updated_genres['track_id'] == track_id, 'genres'].values[0]
        
        # Extract album title from audio file
        album_title = extract_album_title(audio_path)
        
        # Extract artist name from audio file
        artist_name = extract_artist_name(audio_path)
        existing_doc = collection.find_one({"_id": track_id})
        if existing_doc:
                        # Update existing document
            collection.update_one({"_id": track_id}, {"$set": {
            "features": features.tolist(),
            "album_title": album_title,
            "artist_name": artist_name,
            "track_title": track_title,
            "genre_title": genre_title,
            }})
        else:
                        # Insert new document
            document = {
                        
                "_id": track_id,
                "features": features.tolist(),
                "album_title": album_title,
                "artist_name": artist_name,
                "track_title": track_title,
                "genre_title": genre_title,                    }
        collection.insert_one(document)
        # Extract track title from audio file
        track_title = extract_track_title(audio_path)
        
        
        # Prepare the document
        document = {
            "features": features.tolist(),
            "album_title": album_title,
            "artist_name": artist_name,
            "track_title": track_title,
            "genre_title": genre_title
        }
        
        if track_id in echonest_data.index:
            echonest_entry = echonest_data.loc[track_id].to_dict()
            document['echonest'] = echonest_entry
            print(f"Echonest data for track ID {track_id}: {echonest_entry}")
        else:
            document['echonest'] = {}
            print(f"No Echonest data for track ID {track_id}")
        
        # Insert/update the document in MongoDB
        result = collection.update_one({"_id": track_id}, {"$set": document}, upsert=True)
        print(f"MongoDB update result for track ID {track_id}: {result.modified_count} modified, {result.upserted_id} upserted")
        
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")


# Main script execution
if __name__ == '__main__':
    total_files_to_process = 100  # Set limit for processed files
    extract_metadata_and_features(extracted_data_folder, total_files_to_process)
    print("Feature extraction and loading into MongoDB completed.")

  metadata = pd.read_csv(metadata_path, index_col=0)

  0%|                                                 | 0/13129 [00:00<?, ?it/s][A
  0%|                                         | 1/13129 [00:00<54:43,  4.00it/s][A
  1%|▏                                      | 80/13129 [00:00<00:45, 289.25it/s][A
  2%|▊                                     | 268/13129 [00:00<00:15, 832.20it/s][A
  3%|█▏                                   | 413/13129 [00:00<00:12, 1032.73it/s][A
  4%|█▌                                   | 562/13129 [00:00<00:10, 1177.48it/s][A
  5%|██                                   | 720/13129 [00:00<00:09, 1275.85it/s][A
  7%|██▍                                  | 859/13129 [00:00<00:10, 1201.22it/s][A
  8%|██▊                                 | 1020/13129 [00:00<00:09, 1316.28it/s][A
  9%|███▎                                | 1198/13129 [00:01<00:08, 1450.18it/s][A
 11%|███▊                                | 1379/13129 [00:01<00:07, 1553.50it/s][A
 12%|████▎            

Processing track ID: 136792



Processing audio files:   1%|▏                  | 1/100 [00:00<00:38,  2.57it/s][A

Error processing fma_large/136/136792.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136750



Processing audio files:   2%|▍                  | 2/100 [00:00<00:33,  2.93it/s][A

Error processing fma_large/136/136750.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136413



Processing audio files:   3%|▌                  | 3/100 [00:00<00:30,  3.21it/s][A

Error processing fma_large/136/136413.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136963



Processing audio files:   4%|▊                  | 4/100 [00:01<00:30,  3.17it/s][A

Error processing fma_large/136/136963.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136815



Processing audio files:   5%|▉                  | 5/100 [00:01<00:29,  3.26it/s][A

Error processing fma_large/136/136815.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136631



Processing audio files:   6%|█▏                 | 6/100 [00:01<00:28,  3.25it/s][A

Error processing fma_large/136/136631.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136055



Processing audio files:   7%|█▎                 | 7/100 [00:02<00:30,  3.08it/s][A

Error processing fma_large/136/136055.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136598



Processing audio files:   8%|█▌                 | 8/100 [00:02<00:31,  2.91it/s][A

Error processing fma_large/136/136598.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136414



Processing audio files:   9%|█▋                 | 9/100 [00:02<00:30,  2.94it/s][A

Error processing fma_large/136/136414.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136318



Processing audio files:  10%|█▊                | 10/100 [00:03<00:29,  3.08it/s][A

Error processing fma_large/136/136318.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136048



Processing audio files:  11%|█▉                | 11/100 [00:03<00:27,  3.18it/s][A

Error processing fma_large/136/136048.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136113



Processing audio files:  12%|██▏               | 12/100 [00:04<00:32,  2.68it/s][A

Error processing fma_large/136/136113.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136101



Processing audio files:  13%|██▎               | 13/100 [00:04<00:31,  2.74it/s][A

Error processing fma_large/136/136101.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136110



Processing audio files:  14%|██▌               | 14/100 [00:04<00:29,  2.89it/s][A

Error processing fma_large/136/136110.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136800



Processing audio files:  15%|██▋               | 15/100 [00:05<00:30,  2.76it/s][A

Error processing fma_large/136/136800.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136229



Processing audio files:  16%|██▉               | 16/100 [00:05<00:29,  2.86it/s][A

Error processing fma_large/136/136229.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136093



Processing audio files:  17%|███               | 17/100 [00:05<00:28,  2.95it/s][A

Error processing fma_large/136/136093.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136752



Processing audio files:  18%|███▏              | 18/100 [00:06<00:27,  3.00it/s][A

Error processing fma_large/136/136752.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136235



Processing audio files:  19%|███▍              | 19/100 [00:06<00:26,  3.10it/s][A

Error processing fma_large/136/136235.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136212



Processing audio files:  20%|███▌              | 20/100 [00:06<00:25,  3.17it/s][A

Error processing fma_large/136/136212.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136341



Processing audio files:  21%|███▊              | 21/100 [00:07<00:25,  3.04it/s][A

Error processing fma_large/136/136341.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136016



Processing audio files:  22%|███▉              | 22/100 [00:07<00:26,  2.99it/s][A

Error processing fma_large/136/136016.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136648



Processing audio files:  23%|████▏             | 23/100 [00:07<00:24,  3.10it/s][A

Error processing fma_large/136/136648.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136349



Processing audio files:  24%|████▎             | 24/100 [00:08<00:25,  2.95it/s][A

Error processing fma_large/136/136349.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136386



Processing audio files:  25%|████▌             | 25/100 [00:08<00:25,  2.92it/s][A

Error processing fma_large/136/136386.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136806



Processing audio files:  26%|████▋             | 26/100 [00:08<00:25,  2.86it/s][A

Error processing fma_large/136/136806.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136107



Processing audio files:  27%|████▊             | 27/100 [00:09<00:25,  2.89it/s][A

Error processing fma_large/136/136107.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136186



Processing audio files:  28%|█████             | 28/100 [00:09<00:24,  2.93it/s][A

Error processing fma_large/136/136186.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136703



Processing audio files:  29%|█████▏            | 29/100 [00:09<00:26,  2.70it/s][A

Error processing fma_large/136/136703.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136882



Processing audio files:  30%|█████▍            | 30/100 [00:10<00:24,  2.82it/s][A

Error processing fma_large/136/136882.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136220



Processing audio files:  31%|█████▌            | 31/100 [00:10<00:25,  2.67it/s][A

Error processing fma_large/136/136220.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136730



Processing audio files:  32%|█████▊            | 32/100 [00:10<00:25,  2.68it/s][A

Error processing fma_large/136/136730.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136074



Processing audio files:  33%|█████▉            | 33/100 [00:11<00:23,  2.79it/s][A

Error processing fma_large/136/136074.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136871



Processing audio files:  34%|██████            | 34/100 [00:11<00:23,  2.78it/s][A

Error processing fma_large/136/136871.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136933



Processing audio files:  35%|██████▎           | 35/100 [00:11<00:22,  2.86it/s][A

Error processing fma_large/136/136933.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136641



Processing audio files:  36%|██████▍           | 36/100 [00:12<00:22,  2.91it/s][A

Error processing fma_large/136/136641.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136497



Processing audio files:  37%|██████▋           | 37/100 [00:12<00:21,  2.92it/s][A

Error processing fma_large/136/136497.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136056



Processing audio files:  38%|██████▊           | 38/100 [00:12<00:20,  2.99it/s][A

Error processing fma_large/136/136056.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136763



Processing audio files:  39%|███████           | 39/100 [00:13<00:20,  3.02it/s][A

Error processing fma_large/136/136763.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136353



Processing audio files:  40%|███████▏          | 40/100 [00:13<00:20,  2.92it/s][A

Error processing fma_large/136/136353.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136653



Processing audio files:  41%|███████▍          | 41/100 [00:13<00:19,  2.97it/s][A
Processing audio files:  42%|███████▌          | 42/100 [00:14<00:15,  3.63it/s][A

Error processing fma_large/136/136653.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136521
Error processing fma_large/136/136521.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136805



Processing audio files:  43%|███████▋          | 43/100 [00:14<00:17,  3.32it/s][A

Error processing fma_large/136/136805.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136024



Processing audio files:  44%|███████▉          | 44/100 [00:14<00:17,  3.18it/s][A

Error processing fma_large/136/136024.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136962



Processing audio files:  45%|████████          | 45/100 [00:15<00:18,  2.93it/s][A

Error processing fma_large/136/136962.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136198



Processing audio files:  46%|████████▎         | 46/100 [00:15<00:18,  2.96it/s][A

Error processing fma_large/136/136198.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136473



Processing audio files:  47%|████████▍         | 47/100 [00:15<00:17,  3.03it/s][A

Error processing fma_large/136/136473.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136275



Processing audio files:  48%|████████▋         | 48/100 [00:16<00:18,  2.88it/s][A

Error processing fma_large/136/136275.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136929



Processing audio files:  49%|████████▊         | 49/100 [00:16<00:17,  2.97it/s][A

Error processing fma_large/136/136929.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136915



Processing audio files:  50%|█████████         | 50/100 [00:16<00:16,  3.02it/s][A

Error processing fma_large/136/136915.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136704



Processing audio files:  51%|█████████▏        | 51/100 [00:17<00:15,  3.15it/s][A

Error processing fma_large/136/136704.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136983



Processing audio files:  52%|█████████▎        | 52/100 [00:17<00:15,  3.13it/s][A

Error processing fma_large/136/136983.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136770



Processing audio files:  53%|█████████▌        | 53/100 [00:17<00:15,  3.11it/s][A

Error processing fma_large/136/136770.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136654



Processing audio files:  54%|█████████▋        | 54/100 [00:18<00:14,  3.18it/s][A

Error processing fma_large/136/136654.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136412



Processing audio files:  55%|█████████▉        | 55/100 [00:18<00:14,  3.17it/s][A

Error processing fma_large/136/136412.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136009



Processing audio files:  56%|██████████        | 56/100 [00:18<00:14,  3.10it/s][A

Error processing fma_large/136/136009.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136772



Processing audio files:  57%|██████████▎       | 57/100 [00:19<00:14,  3.04it/s][A

Error processing fma_large/136/136772.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136972



Processing audio files:  58%|██████████▍       | 58/100 [00:20<00:28,  1.48it/s][A

Error processing fma_large/136/136972.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136081



Processing audio files:  59%|██████████▌       | 59/100 [00:20<00:23,  1.76it/s][A

Error processing fma_large/136/136081.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136829



Processing audio files:  60%|██████████▊       | 60/100 [00:21<00:19,  2.01it/s][A

Error processing fma_large/136/136829.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136142



Processing audio files:  61%|██████████▉       | 61/100 [00:21<00:17,  2.24it/s][A

Error processing fma_large/136/136142.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136778



Processing audio files:  62%|███████████▏      | 62/100 [00:21<00:15,  2.38it/s][A

Error processing fma_large/136/136778.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136783



Processing audio files:  63%|███████████▎      | 63/100 [00:22<00:14,  2.60it/s][A

Error processing fma_large/136/136783.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136258



Processing audio files:  64%|███████████▌      | 64/100 [00:22<00:13,  2.73it/s][A

Error processing fma_large/136/136258.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136804



Processing audio files:  65%|███████████▋      | 65/100 [00:22<00:12,  2.71it/s][A

Error processing fma_large/136/136804.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136721



Processing audio files:  66%|███████████▉      | 66/100 [00:23<00:12,  2.70it/s][A

Error processing fma_large/136/136721.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136628



Processing audio files:  67%|████████████      | 67/100 [00:23<00:11,  2.84it/s][A

Error processing fma_large/136/136628.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136467



Processing audio files:  68%|████████████▏     | 68/100 [00:23<00:10,  2.98it/s][A

Error processing fma_large/136/136467.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136200



Processing audio files:  69%|████████████▍     | 69/100 [00:24<00:10,  3.03it/s][A

Error processing fma_large/136/136200.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136199



Processing audio files:  70%|████████████▌     | 70/100 [00:24<00:10,  2.93it/s][A

Error processing fma_large/136/136199.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136790



Processing audio files:  71%|████████████▊     | 71/100 [00:24<00:09,  3.02it/s][A

Error processing fma_large/136/136790.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136471



Processing audio files:  72%|████████████▉     | 72/100 [00:25<00:09,  2.88it/s][A

Error processing fma_large/136/136471.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136629



Processing audio files:  73%|█████████████▏    | 73/100 [00:25<00:09,  2.88it/s][A

Error processing fma_large/136/136629.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136444



Processing audio files:  74%|█████████████▎    | 74/100 [00:26<00:09,  2.69it/s][A

Error processing fma_large/136/136444.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136748



Processing audio files:  75%|█████████████▌    | 75/100 [00:26<00:09,  2.51it/s][A

Error processing fma_large/136/136748.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136420



Processing audio files:  76%|█████████████▋    | 76/100 [00:27<00:11,  2.14it/s][A

Error processing fma_large/136/136420.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136781



Processing audio files:  77%|█████████████▊    | 77/100 [00:27<00:09,  2.43it/s][A

Error processing fma_large/136/136781.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136736



Processing audio files:  78%|██████████████    | 78/100 [00:28<00:10,  2.17it/s][A

Error processing fma_large/136/136736.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136743



Processing audio files:  79%|██████████████▏   | 79/100 [00:28<00:10,  2.02it/s][A

Error processing fma_large/136/136743.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136196



Processing audio files:  80%|██████████████▍   | 80/100 [00:28<00:08,  2.24it/s][A

Error processing fma_large/136/136196.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136429



Processing audio files:  81%|██████████████▌   | 81/100 [00:29<00:07,  2.45it/s][A

Error processing fma_large/136/136429.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136216



Processing audio files:  82%|██████████████▊   | 82/100 [00:29<00:07,  2.53it/s][A

Error processing fma_large/136/136216.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136758



Processing audio files:  83%|██████████████▉   | 83/100 [00:29<00:06,  2.57it/s][A

Error processing fma_large/136/136758.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136821



Processing audio files:  84%|███████████████   | 84/100 [00:30<00:06,  2.54it/s][A

Error processing fma_large/136/136821.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136242



Processing audio files:  85%|███████████████▎  | 85/100 [00:30<00:06,  2.50it/s][A

Error processing fma_large/136/136242.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136960



Processing audio files:  86%|███████████████▍  | 86/100 [00:31<00:05,  2.49it/s][A

Error processing fma_large/136/136960.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136599



Processing audio files:  87%|███████████████▋  | 87/100 [00:31<00:04,  2.60it/s][A

Error processing fma_large/136/136599.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136791



Processing audio files:  88%|███████████████▊  | 88/100 [00:31<00:04,  2.79it/s][A

Error processing fma_large/136/136791.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136490



Processing audio files:  89%|████████████████  | 89/100 [00:33<00:06,  1.65it/s][A

Error processing fma_large/136/136490.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136488



Processing audio files:  90%|████████████████▏ | 90/100 [00:33<00:05,  1.87it/s][A

Error processing fma_large/136/136488.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136952



Processing audio files:  91%|████████████████▍ | 91/100 [00:33<00:04,  2.06it/s][A

Error processing fma_large/136/136952.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136814



Processing audio files:  92%|████████████████▌ | 92/100 [00:34<00:03,  2.24it/s][A

Error processing fma_large/136/136814.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136322



Processing audio files:  93%|████████████████▋ | 93/100 [00:34<00:02,  2.34it/s][A

Error processing fma_large/136/136322.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136664



Processing audio files:  94%|████████████████▉ | 94/100 [00:34<00:02,  2.54it/s][A

Error processing fma_large/136/136664.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136965



Processing audio files:  95%|█████████████████ | 95/100 [00:35<00:01,  2.58it/s][A

Error processing fma_large/136/136965.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136831



Processing audio files:  96%|█████████████████▎| 96/100 [00:35<00:01,  2.23it/s][A

Error processing fma_large/136/136831.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136144



Processing audio files:  97%|█████████████████▍| 97/100 [00:36<00:01,  2.43it/s][A

Error processing fma_large/136/136144.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136947



Processing audio files:  98%|█████████████████▋| 98/100 [00:36<00:00,  2.49it/s][A

Error processing fma_large/136/136947.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136124



Processing audio files:  99%|█████████████████▊| 99/100 [00:36<00:00,  2.65it/s][A

Error processing fma_large/136/136124.mp3: local variable 'track_title' referenced before assignment
Processing track ID: 136994



Processing audio files: 100%|█████████████████| 100/100 [00:37<00:00,  2.69it/s][A

Error processing fma_large/136/136994.mp3: local variable 'track_title' referenced before assignment
Feature extraction and loading into MongoDB completed.





In [84]:
import torch
import pymongo
from sklearn.preprocessing import StandardScaler
import numpy as np

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["music_recommendation_50"]
tracks = db["tracks"]

# Load data
data = []
track_ids = []
for track in tracks.find():
    try:
        features = [
            track["acousticness"],
            track["danceability"],
            track["energy"],
            track["instrumentalness"],
            track["liveness"],
            track["speechiness"],
            track["tempo"],
            track["valence"]
        ]
        data.append(features)
        track_ids.append(track["_id"])
    except KeyError as e:
        print(f"Missing value for {e} in track {track['_id']}")

# Convert data to PyTorch tensor
data_np = np.array(data, dtype=np.float32)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_np)
data_tensor = torch.tensor(data_scaled)

# Function to find most similar songs
def find_similar_songs(song_id, data_tensor, track_ids, k=10):
    song_idx = track_ids.index(song_id)
    song_features = data_tensor[song_idx]

    # Compute distances to other songs
    distances = torch.norm(data_tensor - song_features, dim=1)
    nearest_indices = distances.argsort()[:k+1]  # +1 because the closest is the song itself

    # Get similar song IDs, excluding the song itself
    similar_songs = [track_ids[i] for i in nearest_indices if i != song_idx]
    return similar_songs[:k]

# Example: Find 10 most similar songs to a specific song ID
similar_songs = find_similar_songs(3, data_tensor, track_ids)
print("Similar Songs IDs:", similar_songs)


Similar Songs IDs: [26016, 82782, 14377, 56510, 694, 22511, 75377, 101412, 11884, 23319]


In [85]:
import os
import pygame

# Initialize pygame mixer
pygame.mixer.init()

def play_song(song_id):
    # Format the song ID into the required file name format
    file_name = f"{song_id:06d}.mp3"  # Pad the song ID with zeros
    file_path = os.path.join("fma_large", file_name[:3], file_name)  # Construct the path

    # Check if the file exists
    if not os.path.exists(file_path):
        print("Audio file does not exist:", file_path)
        return

    # Load and play the audio file
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()

    # Wait for user command to continue or stop
    print("Playing. Type 'stop' and press enter to stop.")
    while pygame.mixer.music.get_busy():
        command = input()
        if command.lower() == 'stop':
            pygame.mixer.music.stop()
            print("Playback stopped.")
            return
        pygame.time.Clock().tick(10)

# Get song ID from user input
song_id = input("Enter the song ID: ")
if song_id.isdigit():
    play_song(int(song_id))
else:
    print("Invalid input. Please enter a numeric song ID.")


Enter the song ID:  56510


Playing. Type 'stop' and press enter to stop.


 stop


Playback stopped.
