In [1]:
import os

from minio import Minio
from minio.error import S3Error

import mlflow
from mlflow.tracking import MlflowClient
import mlflow.pytorch

import torch

In [None]:
from dotenv import load_dotenv
load_dotenv()

# Set up MLflow tracking URI and MinIO configuration
MLFLOW_TRACKING_URI = os.getenv('MLFLOW_REMOTE_SERVER_URI')
MLFLOW_S3_ENDPOINT_URL = f'https://{os.getenv('REMOTE_MLFLOW_STORAGE_URI')}'
AWS_ACCESS_KEY_ID = os.getenv('NEW_USER_USERNAME')
AWS_SECRET_ACCESS_KEY = os.getenv('NEW_USER_PASSWORD')
REMOTE_MLFLOW_STORAGE_URI = os.getenv('REMOTE_MLFLOW_STORAGE_URI')

os.environ['MLFLOW_TRACKING_URI'] = MLFLOW_TRACKING_URI
os.environ['MLFLOW_S3_ENDPOINT_URL'] = MLFLOW_S3_ENDPOINT_URL
os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID
os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY
os.environ['MLFLOW_S3_IGNORE_TLS'] = 'false'

mlflow_client = MlflowClient()

In [None]:
# Set up MinIO client
minio_client = Minio(
    endpoint=REMOTE_MLFLOW_STORAGE_URI,
    access_key=AWS_ACCESS_KEY_ID,
    secret_key=AWS_SECRET_ACCESS_KEY,
    secure=True
)

# List objects in the bucket
bucket_name = "mlflowbucket"
# prefix = "mlflow/"
prefix = ""
try:
    objects = minio_client.list_objects(bucket_name, prefix=prefix, recursive=True)
    for obj in objects:
        print(obj.object_name)
except S3Error as e:
    print("Error occurred:", e)

Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts/model/MLmodel
Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts/model/conda.yaml
Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts/model/data/model.pth
Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts/model/data/pickle_module_info.txt
Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts/model/python_env.yaml
Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts/model/requirements.txt
mlflow/40/4ca1f0f2b8fa463a9e93b7db495262f7/artifacts/model/requirements.txt
mlflow/40/6d69451b7f9d4a4fae8a796cb692839e/artifacts/model/MLmodel
mlflow/40/6d69451b7f9d4a4fae8a796cb692839e/artifacts/model/conda.yaml
mlflow/40/6d69451b7f9d4a4fae8a796cb692839e/artifacts/model/model.pkl
mlflow/40/6d69451b7f9d4a4fae8a796cb692839e/artifacts/model/python

In [4]:
# List experiments
experiments = mlflow.search_experiments()

for experiment in experiments:
    print(f"Experiment ID: {experiment.experiment_id}, Name: {experiment.name}")

Experiment ID: 41, Name: Music_Genre_Classification_V4_20:04:39.9682
Experiment ID: 37, Name: Music_Genre_Classification_V2_11:56:15.3037
Experiment ID: 25, Name: Music_Genre_Classification_V2_19:11:34.5089
Experiment ID: 17, Name: Music Genre Classification 22:43:35.8824


In [5]:
# List runs for a specific experiment
experiment_id = "41"
runs = mlflow.search_runs(experiment_ids=[experiment_id])
artifact_uri = runs.iloc[0].artifact_uri
print(f"artifact_uri for experiment_id {experiment_id}: {artifact_uri}")

artifact_uri for experiment_id 41: s3://mlflowbucket/Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts


In [6]:
def extract_s3_path(artifact_uri):
    if artifact_uri.startswith('s3://'):
        return '/'.join(artifact_uri.split('/')[3:])
    else:
        raise ValueError("The provided URI does not start with 's3://'")

s3_path_to_model = extract_s3_path(artifact_uri)
s3_path_to_model

'Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts'

In [None]:
def download_objects_from_minio(minio_client, bucket_name, s3_path_to_model, local_file_path):
    objects = minio_client.list_objects(bucket_name, prefix=s3_path_to_model, recursive=True)
    for obj in objects:
        local_file = os.path.join(local_file_path, os.path.relpath(obj.object_name, s3_path_to_model))
        os.makedirs(os.path.dirname(local_file), exist_ok=True)
        minio_client.fget_object(bucket_name, obj.object_name, local_file)

# local_file_path = f"models/{experiment_id}"

# download_objects_from_minio(
#     minio_client, 
#     bucket_name, 
#     s3_path_to_model, 
#     local_file_path
#     )

In [7]:
def update_model_in_production_bucket(minio_client, bucket_name, model_folder_path):
    try:
        # Delete the existing model
        objects = minio_client.list_objects(bucket_name, prefix='data/', recursive=True)
        for obj in objects:
            minio_client.remove_object(bucket_name, obj.object_name)
    except Exception as e:
        return f"Error deleting existing model: {e}"

    # upload the new model's folder and all its contents
    if [obj for obj in minio_client.list_objects(bucket_name, recursive=True)] == []:
        try:
            # Upload the new model's folder and all its contents
            for root, dirs, files in os.walk(model_folder_path):
                for file in files:
                    local_file_path = os.path.join(root, file)
                    relative_path = os.path.relpath(local_file_path, model_folder_path)
                    minio_client.fput_object(bucket_name, f"data/{relative_path}", local_file_path)
        except Exception as e:
            return f"Error uploading new model: {e}"
    
    return "Model updated successfully"


bucket_name = "music-net-prod"
# update_model_in_production_bucket(minio_client, bucket_name, local_file_path)

In [8]:
objects = minio_client.list_objects(bucket_name, recursive=True)

for obj in objects:
    print(obj.object_name)

data/model/MLmodel
data/model/conda.yaml
data/model/data/model.pth
data/model/data/pickle_module_info.txt
data/model/python_env.yaml
data/model/requirements.txt


In [None]:
def get_production_model():
    # load the model using mlflow
    minio_url = f"s3://{bucket_name}/data/model/"
    print(f"Minio URL: {minio_url}")

    os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
    os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY
    os.environ["MLFLOW_S3_ENDPOINT_URL"] = MLFLOW_S3_ENDPOINT_URL

    # Check if CUDA is available
    if torch.cuda.is_available():
        return mlflow.pytorch.load_model(minio_url)
    else:
        return mlflow.pytorch.load_model(minio_url, map_location=torch.device('cpu'))


In [10]:
try:
    model = get_production_model()
except FileNotFoundError as e:
    print(f"Error: {e}")

Minio URL: s3://music-net-prod/data/model/


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 6/6 [00:00<00:00, 70.94it/s]   


In [11]:
model.eval()

MusicNet(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=12544, out_features=18, bias=True)
)

In [17]:
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision import transforms

test_dir = "beat_openl3/DATASET/CNNSET/test"

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to the desired size
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize (adjust mean and std as needed)
])

test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=25, shuffle=True, num_workers=2)

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report

def test_model(model, test_loader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            y_true.extend(target.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Calculate metrics
    cm = confusion_matrix(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    class_report = classification_report(y_true, y_pred, zero_division=0)

    print(f"Confusion Matrix:\n{cm}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Classification Report:\n{class_report}")

    return cm, precision, recall, f1, class_report

In [21]:
cm, precision, recall, f1, class_report = test_model(model, test_loader, torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

Confusion Matrix:
[[2 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1]
 [0 0 2 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0]
 [0 1 0 2 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 2 0 0 1]
 [0 0 0 0 1 0 0 2 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 3 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 3 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 2 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2]
 [1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 3]]
Precision: 0.6087
Recall: 0.5833
F1 Score: 0.5621
Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       0.50      0.25      0.33         4
           2       0.67      0.50      0.57


1. **Confusion Matrix**: A table that is used to describe the performance of a classification model. It shows the number of correct and incorrect predictions made by the model compared to the actual classifications.

2. **Precision**: The ratio of correctly predicted positive observations to the total predicted positives. It answers the question: "Of all the instances that were predicted as a certain class, how many were actually that class?"

3. **Recall**: The ratio of correctly predicted positive observations to the all observations in the actual class. It answers the question: "Of all the instances that actually belong to a certain class, how many were correctly predicted?"

4. **F1 Score**: The weighted average of Precision and Recall. It is useful when you need a balance between Precision and Recall.

5. **Classification Report**: A detailed report showing the Precision, Recall, and F1 Score for each class. It provides a comprehensive overview of the model's performance across all classes.