# **LAB I**

In [1]:
import os
import librosa
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import arff
from IPython.display import Audio, display

In [2]:
# Function to load an audio file
def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

In [3]:
# Function to extract features from an audio file
def extract_features(audio, sr):
    # Extract MFCC (Mel-Frequency Cepstral Coefficients)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfcc, axis=1)
    
    # Extract Spectral Centroid
    centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
    centroid_mean = np.mean(centroid)
    
    # Extract Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)
    
    # Combine all features into a single vector
    return np.concatenate([mfcc_mean, [centroid_mean], chroma_mean])

In [4]:
# Function to load an ARFF file
def load_arff(file_path):
    try:
        with open(file_path, 'r') as f:
            data = arff.load(f)
        return data
    except Exception as e:
        print(f"Error reading ARFF file {file_path}: {e}")
        with open(file_path, 'r') as f:
            print("First few lines of the file:")
            print(''.join(f.readlines()[:10]))  # Print first 10 lines
        return None

In [5]:
# Function to process a single track
def process_track(track_id, dataset_path):
    # Define paths to the mix and annotation files
    mix_path = os.path.join(dataset_path, 'audio-mixes-mp3', f'{track_id}_mix.mp3')
    beatinfo_path = os.path.join(dataset_path, 'annotations', f'{track_id}_beatinfo.arff')
    onsets_path = os.path.join(dataset_path, 'annotations', f'{track_id}_onsets.arff')
    segments_path = os.path.join(dataset_path, 'annotations', f'{track_id}_segments.arff')
    
    # Load audio and extract features
    audio, sr = load_audio(mix_path)
    features = extract_features(audio, sr)
    
    # Load additional annotations
    beatinfo = load_arff(beatinfo_path)
    onsets = load_arff(onsets_path)
    segments = load_arff(segments_path)
    
    # Return a dictionary with all track data
    return {
        'id': track_id,
        'title': f"Track {track_id}",  # Simulated title
        'features': features,
        'beatinfo': beatinfo,
        'onsets': onsets,
        'segments': segments,
        'mix_path': mix_path  # Path to the audio mix for playback
    }

In [6]:
# Function to process the dataset
def process_dataset(dataset_path, max_tracks=20):
    tracks = []
    mix_files = os.listdir(os.path.join(dataset_path, 'audio-mixes-mp3'))
    for filename in mix_files:
        if filename.endswith('_mix.mp3'):
            track_id = filename[:4]  # Extract the 4-digit track ID
            try:
                track = process_track(track_id, dataset_path)
                if all(v is not None for v in track.values()):
                    tracks.append(track)
                if len(tracks) >= max_tracks:
                    break
            except Exception as e:
                print(f"Error processing track {track_id}: {e}")
    return tracks

In [7]:
# Function to find a track by ID
def get_track_by_id(tracks, query_id):
    for track in tracks:
        if track['id'] == query_id:
            return track
    return None

In [8]:
# Function to recommend similar tracks based on features
def recommend_tracks(query_track, tracks, top_n=5):
    query_features = query_track['features'].reshape(1, -1)
    similarities = []
    for track in tracks:
        if track['id'] != query_track['id']:  # Skip the same track
            track_features = track['features'].reshape(1, -1)
            similarity = cosine_similarity(query_features, track_features)[0][0]
            similarities.append((track, similarity))
    
    # Sort by similarity score in descending order and return top N recommendations
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[:top_n]

In [9]:
# Function to play an audio track
def play_track(file_path):
    try:
        audio, sr = librosa.load(file_path, sr=None)
        display(Audio(audio, rate=sr))
    except Exception as e:
        print(f"Error playing audio file {file_path}: {e}")

In [10]:
# Main user interface function
def user_interface(tracks):
    print("Welcome to the Music Recommendation System!")
    print("Available tracks:", ', '.join([track['id'] for track in tracks]))
    
    while True:
        query_id = input("Enter a track ID to get recommendations (or 'q' to quit): ")
        if query_id.lower() == 'q':
            break
        
        query_track = get_track_by_id(tracks, query_id)
        
        if query_track:
            print(f"\nPlaying track '{query_track['id']}' - {query_track['title']}...")
            play_track(query_track['mix_path'])  # Play the selected track
            
            recommendations = recommend_tracks(query_track, tracks)
            print("\nRecommendations:")
            for recommended_track, similarity in recommendations:
                print(f"Track ID: {recommended_track['id']}, Similarity: {similarity:.4f}")
                print(f"Playing recommendation '{recommended_track['id']}' - {recommended_track['title']}...")
                play_track(recommended_track['mix_path'])  # Play the recommended track
                print()  # Add a blank line between recommendations
        else:
            print("Invalid track ID. Please try again.")

In [11]:
# Example usage
dataset_path = './data/tinyAAM'
processed_tracks = process_dataset(dataset_path)
print(f"Successfully processed {len(processed_tracks)} tracks")

Successfully processed 20 tracks


In [12]:
# Run the user interface
# user_interface(processed_tracks)

### ***I had to comment out the last cell result because it loaded widjets that made to file too big for GitHub***

But you can run it...