In [None]:
import os
import tempfile

from minio import Minio

import numpy as np
import matplotlib.pyplot as plt
import librosa
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

import mlflow
from mlflow.tracking import MlflowClient

from dotenv import load_dotenv
load_dotenv()

In [13]:
REMOTE_MLFLOW_STORAGE_URI = os.getenv("REMOTE_MLFLOW_STORAGE_URI")
REMOTE_MLFLOW_BUCKET_NAME = os.getenv("REMOTE_MLFLOW_BUCKET_NAME")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

In [None]:
# Initialize the MinIO client
minio_client = Minio(
    endpoint=REMOTE_MLFLOW_STORAGE_URI,
    access_key=AWS_ACCESS_KEY_ID,
    secret_key=AWS_SECRET_ACCESS_KEY,
    secure=True
)

# List all objects in the bucket
objects = minio_client.list_objects(REMOTE_MLFLOW_BUCKET_NAME, prefix="", recursive=True)

for obj in objects:
    print(obj.object_name)

In [None]:
# Define the bucket name and object path
local_file_path = "latest_model_from_mlflow/"

# List and download all objects in the specified path
def download_objects_from_minio(minio_client, bucket_name, s3_path_to_model, local_file_path):
    objects = minio_client.list_objects(bucket_name, prefix=s3_path_to_model, recursive=True)
    for obj in objects:
        local_file = os.path.join(local_file_path, os.path.relpath(obj.object_name, s3_path_to_model))
        os.makedirs(os.path.dirname(local_file), exist_ok=True)
        minio_client.fget_object(bucket_name, obj.object_name, local_file)

download_objects_from_minio(
    minio_client, 
    REMOTE_MLFLOW_BUCKET_NAME, 
    "data", 
    local_file_path
    )

In [25]:
def create_preprocessed_spectrogram(audio_path, sr=22050, n_mels=128, fmax=8000, img_size=(224, 224), start_time=20, segment_duration=20):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=sr, offset=start_time, duration=segment_duration)
        
        # Generate the spectrogram
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
        S_DB = librosa.power_to_db(S, ref=np.max)
        
        # Plot the spectrogram
        plt.figure(figsize=(10, 4))
        plt.axis('off')
        librosa.display.specshow(S_DB, sr=sr, x_axis=None, y_axis=None, fmax=fmax)
        
        # Save the plot to a temporary file
        with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmpfile:
            plt.savefig(tmpfile.name, bbox_inches='tight', pad_inches=0)
            plt.close()
            
            # Open the image and resize it
            img = Image.open(tmpfile.name).convert('RGB')  # Convert to RGB
            img = img.resize(img_size, Image.Resampling.LANCZOS)
            os.remove(tmpfile.name)
        
        # Transform the image to tensor
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])
        img_tensor = transform(img).unsqueeze(0)  # Add batch dimension
        
        return img_tensor
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

In [29]:
# Load the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
# model_from_minio = MusicNet(num_classes=18)

device(type='cpu')

In [None]:
# Load the model state dictionary
model_from_minio = mlflow.pytorch.load_model(local_file_path, map_location=torch.device('cpu'))
model_from_minio.to(device)



MusicNet(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=12544, out_features=18, bias=True)
)

In [None]:
# Preprocess the audio file and make a prediction
audio_path = "/home/kin/Documents/music_similarity/preprocessing/MegaSet/Clinton Fearon/Clinton Fearon - 2010 - Mi Deh Yah/01-clinton_fearon-life_is_a_journey.mp3"  # reggae
img_tensor = create_preprocessed_spectrogram(audio_path)

mapping = {'blues': 0,
 'chanson': 1,
 'classical': 2,
 'country': 3,
 'dance': 4,
 'dub': 5,
 'electro': 6,
 'folk': 7,
 'funk': 8,
 'hard rock': 9,
 'hip-hop': 10,
 'house': 11,
 'jazz': 12,
 'metal': 13,
 'pop': 14,
 'rap': 15,
 'reggae': 16,
 'rock': 17}

# Make a prediction
if img_tensor is not None:
    img_tensor = img_tensor.to(device)
    model_from_minio.eval()
    with torch.no_grad():
        output = model_from_minio(img_tensor)
        _, predicted = torch.max(output, 1)
    
    # Create a reverse mapping from index to class name
    idx_to_class = {v: k for k, v in mapping.items()}

    # Get the predicted class name
    predicted_class_name = idx_to_class[predicted.item()]

    # Print the predicted class
    print(f'Predicted class: {predicted_class_name}')

Predicted class: reggae
