In [1]:
import os

from minio import Minio
from minio.error import S3Error

import mlflow
from mlflow.tracking import MlflowClient
import mlflow.pytorch

import torch

In [2]:
from dotenv import load_dotenv
load_dotenv()

# Set up MLflow tracking URI and MinIO configuration
MLFLOW_TRACKING_URI = os.getenv('MLFLOW_REMOTE_SERVER_URI')
MLFLOW_S3_ENDPOINT_URL = f"https://{os.getenv('REMOTE_MLFLOW_STORAGE_URI')}"
AWS_ACCESS_KEY_ID = os.getenv('NEW_USER_USERNAME')
AWS_SECRET_ACCESS_KEY = os.getenv('NEW_USER_PASSWORD')
REMOTE_MLFLOW_STORAGE_URI = os.getenv('REMOTE_MLFLOW_STORAGE_URI')

os.environ['MLFLOW_TRACKING_URI'] = MLFLOW_TRACKING_URI
os.environ['MLFLOW_S3_ENDPOINT_URL'] = MLFLOW_S3_ENDPOINT_URL
os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID
os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY
os.environ['MLFLOW_S3_IGNORE_TLS'] = 'false'

mlflow_client = MlflowClient()

In [3]:
# Set up MinIO client
minio_client = Minio(
    endpoint=REMOTE_MLFLOW_STORAGE_URI,
    access_key=AWS_ACCESS_KEY_ID,
    secret_key=AWS_SECRET_ACCESS_KEY,
    secure=True
)

# List objects in the bucket
bucket_name = "mlflowbucket"
# prefix = "mlflow/"
prefix = ""
try:
    objects = minio_client.list_objects(bucket_name, prefix=prefix, recursive=True)
    for obj in objects:
        print(obj.object_name)
except S3Error as e:
    print("Error occurred:", e)

21228c06f6b643938b156510312de1a5/artifacts/classification_report.txt
21228c06f6b643938b156510312de1a5/artifacts/model/MLmodel
21228c06f6b643938b156510312de1a5/artifacts/model/conda.yaml
21228c06f6b643938b156510312de1a5/artifacts/model/data/model.pth
21228c06f6b643938b156510312de1a5/artifacts/model/data/pickle_module_info.txt
21228c06f6b643938b156510312de1a5/artifacts/model/python_env.yaml
21228c06f6b643938b156510312de1a5/artifacts/model/requirements.txt
57cedbcc86d3444fa3c079443bfeec7c/artifacts/classification_report.txt
57cedbcc86d3444fa3c079443bfeec7c/artifacts/model/MLmodel
57cedbcc86d3444fa3c079443bfeec7c/artifacts/model/conda.yaml
57cedbcc86d3444fa3c079443bfeec7c/artifacts/model/data/model.pth
57cedbcc86d3444fa3c079443bfeec7c/artifacts/model/data/pickle_module_info.txt
57cedbcc86d3444fa3c079443bfeec7c/artifacts/model/python_env.yaml
57cedbcc86d3444fa3c079443bfeec7c/artifacts/model/requirements.txt
6fed7bbe04d14e9aa4ffbce4b48a56e8/artifacts/model/MLmodel
6fed7bbe04d14e9aa4ffbce4b48

In [4]:
# List experiments
experiments = mlflow.search_experiments()

for experiment in experiments:
    print(f"Experiment ID: {experiment.experiment_id}, Name: {experiment.name}")

Experiment ID: 46, Name: Music_Genre_Classification_V2_19:17:29.8986
Experiment ID: 45, Name: Music_Genre_Classification_V2_18:52:06.3148
Experiment ID: 44, Name: Music_Genre_Classification_V2_18:35:37.0929
Experiment ID: 43, Name: Music_Genre_Classification_V2_17:49:20.0907
Experiment ID: 42, Name: Music_Genre_Classification_V2_19:35:42.6849
Experiment ID: 41, Name: Music_Genre_Classification_V4_20:04:39.9682
Experiment ID: 37, Name: Music_Genre_Classification_V2_11:56:15.3037
Experiment ID: 25, Name: Music_Genre_Classification_V2_19:11:34.5089
Experiment ID: 17, Name: Music Genre Classification 22:43:35.8824


In [5]:
# List runs for a specific experiment
experiment_id = "41"
runs = mlflow.search_runs(experiment_ids=[experiment_id])
artifact_uri = runs.iloc[0].artifact_uri
print(f"artifact_uri for experiment_id {experiment_id}: {artifact_uri}")

artifact_uri for experiment_id 41: s3://mlflowbucket/Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts


In [6]:
def extract_s3_path(artifact_uri):
    if artifact_uri.startswith('s3://'):
        return '/'.join(artifact_uri.split('/')[3:])
    else:
        raise ValueError("The provided URI does not start with 's3://'")

s3_path_to_model = extract_s3_path(artifact_uri)
s3_path_to_model

'Music_Genre_Classification_V4_20:04:39.9682/d38eb340c51e4c4b8b3b50a60bbfa158/artifacts'

In [7]:
def download_objects_from_minio(minio_client, bucket_name, s3_path_to_model, local_file_path):
    objects = minio_client.list_objects(bucket_name, prefix=s3_path_to_model, recursive=True)
    for obj in objects:
        local_file = os.path.join(local_file_path, os.path.relpath(obj.object_name, s3_path_to_model))
        os.makedirs(os.path.dirname(local_file), exist_ok=True)
        minio_client.fget_object(bucket_name, obj.object_name, local_file)

# local_file_path = f"models/{experiment_id}"

# download_objects_from_minio(
#     minio_client, 
#     bucket_name, 
#     s3_path_to_model, 
#     local_file_path
#     )

In [8]:
def update_model_in_production_bucket(minio_client, bucket_name, model_folder_path):
    try:
        # Delete the existing model
        objects = minio_client.list_objects(bucket_name, prefix='data/', recursive=True)
        for obj in objects:
            minio_client.remove_object(bucket_name, obj.object_name)
    except Exception as e:
        return f"Error deleting existing model: {e}"

    # upload the new model's folder and all its contents
    if [obj for obj in minio_client.list_objects(bucket_name, recursive=True)] == []:
        try:
            # Upload the new model's folder and all its contents
            for root, dirs, files in os.walk(model_folder_path):
                for file in files:
                    local_file_path = os.path.join(root, file)
                    relative_path = os.path.relpath(local_file_path, model_folder_path)
                    minio_client.fput_object(bucket_name, f"data/{relative_path}", local_file_path)
        except Exception as e:
            return f"Error uploading new model: {e}"
    
    return "Model updated successfully"


bucket_name = "music-net-prod"
# update_model_in_production_bucket(minio_client, bucket_name, local_file_path)

In [9]:
objects = minio_client.list_objects(bucket_name, recursive=True)

for obj in objects:
    print(obj.object_name)

data/model/MLmodel
data/model/conda.yaml
data/model/data/model.pth
data/model/data/pickle_module_info.txt
data/model/python_env.yaml
data/model/requirements.txt


In [10]:
def get_production_model():
    # load the model using mlflow
    minio_url = f"s3://{bucket_name}/data/model/"
    print(f"Minio URL: {minio_url}")

    os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
    os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY
    os.environ["MLFLOW_S3_ENDPOINT_URL"] = MLFLOW_S3_ENDPOINT_URL

    # Check if CUDA is available
    if torch.cuda.is_available():
        return mlflow.pytorch.load_model(minio_url)
    else:
        return mlflow.pytorch.load_model(minio_url, map_location=torch.device('cpu'))


In [11]:
try:
    model = get_production_model()
except FileNotFoundError as e:
    print(f"Error: {e}")

Minio URL: s3://music-net-prod/data/model/


Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]



In [12]:
model.eval()

MusicNet(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batchnorm1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=12544, out_features=18, bias=True)
)

In [13]:
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision import transforms

test_dir = "/home/kin/Documents/music-cnn/beat_openl3/DATASET/CNNSET/test"

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to the desired size
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize (adjust mean and std as needed)
])

test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=25, shuffle=True, num_workers=2)

In [14]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report
import pandas as pd

def test_model(model, test_loader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            y_true.extend(target.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Calculate metrics
    cm = confusion_matrix(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    class_report_dict = classification_report(y_true, y_pred, zero_division=0, output_dict=True)
    class_report_df = pd.DataFrame(class_report_dict).transpose()


    # print(f"Confusion Matrix:\n{cm}")
    # print(f"Precision: {precision:.4f}")
    # print(f"Recall: {recall:.4f}")
    # print(f"F1 Score: {f1:.4f}")
    # print(f"Classification Report:\n{class_report}")

    return cm, precision, recall, f1, class_report_df

In [15]:
cm, precision, recall, f1, class_report_df = test_model(model, test_loader, torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

class_report_df

Unnamed: 0,precision,recall,f1-score,support
0,0.666667,0.5,0.571429,4.0
1,0.5,0.25,0.333333,4.0
2,0.666667,0.5,0.571429,4.0
3,1.0,0.5,0.666667,4.0
4,0.8,1.0,0.888889,4.0
5,0.8,1.0,0.888889,4.0
6,0.0,0.0,0.0,4.0
7,0.5,0.5,0.5,4.0
8,1.0,0.75,0.857143,4.0
9,0.444444,1.0,0.615385,4.0


In [16]:
# Here's how to interpret the classification report:

# Precision: The ratio of correctly predicted positive observations to the total predicted positives. High precision indicates a low false positive rate.
# Recall: The ratio of correctly predicted positive observations to all observations in the actual class. High recall indicates a low false negative rate.
# F1-Score: The weighted average of precision and recall. It considers both false positives and false negatives. A high F1-score indicates a good balance between precision and recall.
# Support: The number of actual occurrences of the class in the dataset.
# For each class (0 to 17):

# Precision: How many selected items are relevant.
# Recall: How many relevant items are selected.
# F1-Score: The harmonic mean of precision and recall.
# Support: The number of true instances for each class.
# Overall metrics:

# Accuracy: The ratio of correctly predicted instances to the total instances.
# Macro avg: The average of the precision, recall, and F1-score for all classes, treating all classes equally.
# Weighted avg: The average of the precision, recall, and F1-score for all classes, weighted by the number of true instances for each class.
# Example Interpretation:
# Class 0:

# Precision: 0.67 (67% of the predicted class 0 instances are correct)
# Recall: 0.50 (50% of the actual class 0 instances are correctly predicted)
# F1-Score: 0.57 (balance between precision and recall)
# Support: 4 (there are 4 actual instances of class 0)
# Overall Accuracy: 0.5833 (58.33% of the total instances are correctly predicted)

# Macro avg:

# Precision: 0.6087
# Recall: 0.5833
# F1-Score: 0.5621
# Weighted avg:

# Precision: 0.6087
# Recall: 0.5833
# F1-Score: 0.5621
# These metrics help you understand the performance of your model across different classes and overall.