In [1]:
import numpy as np
import pandas as pd
import os

In [3]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.20.3-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.20.3 (from mlflow)
  Downloading mlflow_skinny-2.20.3-py3-none-any.whl.metadata (31 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.20.3->mlflow)
  Downloading databricks_sdk-0.45.0-py3-none-any.whl.metadata (38 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)
Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)
Downloading mlflow-2.20.3-py3-none-any.whl (28.4 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m28.

In [4]:
import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from sklearn.metrics import classification_report
from tqdm import tqdm

In [10]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

# MLFlow

In [5]:
# Step 1: Set MLflow Tracking URI and Experiment Name
URL_MLFLOW = "http://34.143.211.28:5000/"  # Example URI (replace with your MLflow server URL)
mlflow.set_tracking_uri(URL_MLFLOW)
mlflow.set_experiment("Image_Classification_Experiment_DogSkin")  # Experiment name

<Experiment: artifact_location='mlflow-artifacts:/841216626593489635', creation_time=1741580223690, experiment_id='841216626593489635', last_update_time=1741580223690, lifecycle_stage='active', name='Image_Classification_Experiment_DogSkin', tags={}>

In [116]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ResNet input size
    transforms.RandomHorizontalFlip(p=0.5),  # 50% chance of horizontal flip
    transforms.RandomRotation(degrees=15),  # Random rotation ¬±15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust colors
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),  # Small rotations & translations
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Random crop & resize
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for ResNet
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
def create_img_path(path):
    annotation = pd.DataFrame(columns=['image_name', 'label', 'map_class'])
    classes = os.listdir(path)
    map_class = {
        'Dermatitis': 0,
        'demodicosis': 1,
        'Healthy': 2,
        'Hypersensitivity': 3,
        'Fungal_infections': 4,
        'ringworm': 5
    }
    
    for dir_class in classes:
        for img_path in os.listdir(os.path.join(path, dir_class)):
            row = {
                'image_name': os.path.join(path, dir_class, img_path),
                'label': dir_class,
                'map_class': map_class[dir_class]
            }
            annotation = pd.concat([annotation, pd.DataFrame([row])], ignore_index=True)

    return annotation

In [8]:
anno_test = create_img_path('/kaggle/input/dogs-skin-diseases-image-dataset/test')
anno_train = create_img_path('/kaggle/input/dogs-skin-diseases-image-dataset/train')
anno_valid = create_img_path('/kaggle/input/dogs-skin-diseases-image-dataset/valid')

In [29]:
class Customdataset(Dataset):
    def __init__(self, annotation, transform=None):
        self.annotation = annotation
        self.transform = transform
    def __getitem__(self, idx):
        image = Image.open(self.annotation.loc[idx, 'image_name']).convert('RGB')
        label = self.annotation.loc[idx, 'map_class']

        if self.transform != None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.annotation)

    def get_class_name(self):
        return self.annotation['label'].unique().tolist()

In [117]:
train_set = Customdataset(anno_train, transform = train_transform)
valid_set = Customdataset(anno_valid, transform = test_transform)
test_set = Customdataset(anno_test, transform = test_transform)
train_loader = DataLoader(train_set, batch_size=8, shuffle=True, num_workers=4, persistent_workers=True)
valid_loader = DataLoader(valid_set, batch_size=8, shuffle=False, num_workers=4, persistent_workers=True)
test_loader = DataLoader(test_set, batch_size=8, shuffle=False, num_workers=4, persistent_workers=True)

In [111]:
import mlflow
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torchvision import transforms, models
from sklearn.metrics import classification_report, confusion_matrix
import json

In [113]:
def model_training(model, train_loader, val_loader, train_transform, test_transform, dataset_path,
                   pretrained_weights_name, epochs=10, lr=0.001, batch_size=None, device=None, run_name="Image_Classification_Exp"):
    
    # Select device (GPU if available)
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # Loss function, optimizer, and scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)  # Reduce LR every 5 epochs

    with mlflow.start_run(run_name=run_name):  # Set custom run name
        # Log hyperparameters
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("dataset_path", dataset_path)  # Log dataset path
        mlflow.log_param("pretrained_weights", pretrained_weights_name)
        mlflow.log_param("batch_size", batch_size)  # Log batch size

        # Log data transformations
        log_transforms(train_transform, "train_transform")
        log_transforms(test_transform, "test_transform")

        for epoch in range(epochs):
            model.train()
            running_loss = 0.0

            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            for images, labels in progress_bar:
                images, labels = images.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                progress_bar.set_postfix(loss=loss.item())

            # Log training loss
            mlflow.log_metric("train_loss", running_loss / len(train_loader), step=epoch)

            # Validation loop
            model.eval()
            val_loss = 0.0
            correct, total = 0, 0
            y_true, y_pred = [], []
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    y_true.extend(labels.cpu().numpy())
                    y_pred.extend(predicted.cpu().numpy())

                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = correct / total

            # Compute classification report & confusion matrix
            class_report = classification_report(y_true, y_pred, output_dict=True)
            cm = confusion_matrix(y_true, y_pred)

            # Log classification report as an artifact
            mlflow.log_dict(class_report, f"classification_report_epoch_{epoch}.json")

            # Log confusion matrix as an artifact
            mlflow.log_dict({"confusion_matrix": cm.tolist()}, f"confusion_matrix_epoch_{epoch}.json")

            # Log individual metrics for each class
            for class_label, metrics in class_report.items():
                if isinstance(metrics, dict):
                    mlflow.log_metric(f"precision_class_{class_label}", metrics.get("precision", 0), step=epoch)
                    mlflow.log_metric(f"recall_class_{class_label}", metrics.get("recall", 0), step=epoch)
                    mlflow.log_metric(f"f1_score_class_{class_label}", metrics.get("f1-score", 0), step=epoch)

            # Log overall validation metrics
            mlflow.log_metric("val_accuracy", accuracy, step=epoch)
            mlflow.log_metric("macro_avg_f1", class_report["macro avg"]["f1-score"], step=epoch)
            mlflow.log_metric("weighted_avg_f1", class_report["weighted avg"]["f1-score"], step=epoch)
            mlflow.log_metric("val_loss", val_loss / len(val_loader), step=epoch)

            print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {running_loss/len(train_loader):.4f}, '
                  f'Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {accuracy:.4f}')

            # Step the LR scheduler
            scheduler.step()
            mlflow.log_metric("learning_rate", scheduler.get_last_lr()[0], step=epoch)

        # Save final trained model
        model_name = f"model_{run_name}.pth"
        torch.save(model.state_dict(), model_name)
        mlflow.pytorch.log_model(model, "model")
        print("Training Complete!")
        
        # Upload to GCS
        service_account_key_path = "/kaggle/input/bucket-key/lab-molops-bf2b87b5777c.json"  # Change as needed
        bucket_name = 'mlops_image_model/image_classification'  # Change as needed
        blob_name = model_name  # Path to the file in the image_classification folder
        url = generate_upload_signed_url_v4(bucket_name, blob_name, service_account_key_path)
        file_path = f"/kaggle/working/{model_name}"
        content_type = "application/octet-stream"
        upload_file_to_url(url, file_path, content_type)
        print("Model Save Complete!")

    return model


In [None]:
def generate_upload_signed_url_v4(bucket_name, blob_name, service_account_key_path):
    """Generates a v4 signed URL for uploading a blob using HTTP PUT."""
    from google.cloud import storage
    from google.oauth2 import service_account
    import datetime
    
    # Set up the credentials using the service account key
    credentials = service_account.Credentials.from_service_account_file(service_account_key_path)

    # Use the credentials to create a storage client
    storage_client = storage.Client(credentials=credentials, project=credentials.project_id)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    # Generate the signed URL for uploading a file
    url = blob.generate_signed_url(
        version="v4",
        expiration=datetime.timedelta(minutes=15),  # URL valid for 15 minutes
        method="PUT",  # Allow PUT requests using this URL
        content_type="application/octet-stream",  # MIME type of the file
    )

    print("Generated PUT signed URL:")
    print(url)
    print("You can use this URL with any user agent, for example:")
    print(f"curl -X PUT -H 'Content-Type: application/octet-stream' --upload-file my-file '{url}'")

    return url

In [114]:
def upload_file_to_url(pre_signed_url, file_path, content_type):
    """Uploads a file to a pre-signed URL using HTTP PUT with the correct Content-Type."""
    headers = {
        'Content-Type': content_type  # Make sure the Content-Type matches what was specified in the signed URL
    }

    with open(file_path, 'rb') as file:
        # Perform the PUT request to upload the file
        response = requests.put(pre_signed_url, data=file, headers=headers)

    # Check if the upload was successful
    if response.status_code == 200:
        print(f"File uploaded successfully to {pre_signed_url}")
    else:
        print(f"Failed to upload file. Status code: {response.status_code}, {response.text}")

In [115]:
def log_transforms(transform, name="train_transform"):
    """Logs the transformation pipeline to MLflow."""
    if hasattr(transform, 'transforms'):
        transform_list = [str(t) for t in transform.transforms]
    else:
        transform_list = [str(transform)]
    
    transform_json = json.dumps(transform_list, indent=4)
    mlflow.log_text(transform_json, f"{name}.json")

In [None]:
model_resnet50 = models.resnet50(num_classes=len(train_set.get_class_name()))
model_resnet50.fc = torch.nn.Linear(model_resnet50.fc.in_features, len(train_set.get_class_name()))

In [51]:
# Define the model (MobileNetV3)
model_mbv3 = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1)
model_mbv3.classifier[3] = torch.nn.Linear(model_mbv3.classifier[3].in_features, len(train_set.get_class_name()))

In [118]:
epochs = 5
learning_rate = 0.001
run_name = "MobileNetV3_Exp_002"
dataset_path = "https://www.kaggle.com/datasets/youssefmohmmed/dogs-skin-diseases-image-dataset"

trained_mbv3 = model_training(
    model=model_mbv3, 
    train_loader=train_loader, 
    val_loader=valid_loader, 
    train_transform=train_transform, 
    test_transform=test_transform, 
    dataset_path=dataset_path,  # Replace with your dataset path
    pretrained_weights_name="IMAGENET1K_V1",  # Log the pretrained weights name
    epochs=epochs, 
    lr=learning_rate,
    run_name=run_name,
    batch_size=train_loader.batch_size
)

Epoch 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 378/378 [00:20<00:00, 18.24it/s, loss=0.136] 


Epoch [1/5], Train Loss: 0.5824, Val Loss: 0.8058, Val Accuracy: 0.7826


Epoch 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 378/378 [00:19<00:00, 19.42it/s, loss=0.404] 


Epoch [2/5], Train Loss: 0.4940, Val Loss: 0.5831, Val Accuracy: 0.8349


Epoch 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 378/378 [00:19<00:00, 19.29it/s, loss=0.729]  


Epoch [3/5], Train Loss: 0.4480, Val Loss: 0.4192, Val Accuracy: 0.8756


Epoch 4/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 378/378 [00:19<00:00, 19.16it/s, loss=0.194] 


Epoch [4/5], Train Loss: 0.4115, Val Loss: 0.4958, Val Accuracy: 0.8605


Epoch 5/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 378/378 [00:19<00:00, 19.71it/s, loss=0.182]  


Epoch [5/5], Train Loss: 0.3862, Val Loss: 0.5356, Val Accuracy: 0.8640




Training Complete!
File uploaded successfully to https://storage.googleapis.com/mlops_image_model/image_classification/model_MobileNetV3_Exp_002.pth?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=398747038935-compute%40developer.gserviceaccount.com%2F20250310%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250310T063050Z&X-Goog-Expires=900&X-Goog-SignedHeaders=content-type%3Bhost&X-Goog-Signature=2763ed3bde16dae2a49141c1e249eb2105bbdd929f05d42172ed95bba30caef9dcb01c359cf2bc5df986c01e9bfb28207e963757f6a16ff6dda53b4d01dbb2665e0e0448ea6a98fa09467c1f9ea637368770e1775ece02df262be1b9960535b6748c524974439cb8afceb3be6d7fb71f09d26ae0fbb002fcf6e016a976efa90765d1bfbf7ddf3e7ff485c36970a4ff582e8876b3e51380c17847233eabbe7d9e5936d0ae85935cfae85b4d86be48753a4a853dfc2fefd4e4a3265d4b24fc68ebb501e2d9cc529ef4619577849bedf7652425849e4d6b893a57346dad99db9b891098d4cdf4c857a256dba058b8076d6c2fbc0edad290a6ba4cb4d35fb10ef97e
Model Save Complete!
üèÉ View run MobileNetV3_Exp_002 at: http://34.143.211.28:500

In [103]:
model_test = models.mobilenet_v3_small(num_classes=len(train_set.get_class_name()))
model_test.load_state_dict(torch.load("/kaggle/input/test_bucket_model/pytorch/default/3/image_classification_model.pth"))
model_test.eval()  # Set to evaluation mode

  model_test.load_state_dict(torch.load("/kaggle/input/test_bucket_model/pytorch/default/3/image_classification_model.pth"))


MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), 

# Inference

In [77]:
model = models.resnet50(num_classes=len(train_set.get_class_name()))
model.load_state_dict(torch.load("model.pth"))
model.eval()  # Set to evaluation mode

  model.load_state_dict(torch.load("model.pth"))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [78]:
model1 = models.resnet50(num_classes=len(train_set.get_class_name()))

In [72]:
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

In [73]:
def evaluate_model(model, test_loader, device):
    model.to(device)
    model.eval()  # Set to evaluation mode

    y_true, y_pred = [], []  # Store true & predicted labels

    with torch.no_grad():  # Disable gradient calculation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            _, predicted = torch.max(outputs, 1)  # Get class with highest probability

            y_true.extend(labels.cpu().numpy())  # Convert to list
            y_pred.extend(predicted.cpu().numpy())

    # Convert to NumPy arrays for metrics calculation
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Compute confusion matrix & classification metrics
    conf_matrix = confusion_matrix(y_true, y_pred)
    class_report = classification_report(y_true, y_pred, digits=4)

    # Print Results
    print(f"Confusion Matrix:\n{conf_matrix}\n")
    print(f"Classification Report:\n{class_report}")

    return conf_matrix, class_report

In [74]:
device = "cuda" if torch.cuda.is_available() else "cpu"
conf_matrix, class_report = evaluate_model(model, test_loader, device)

Confusion Matrix:
[[ 49   1   2   2   2  10]
 [  0  95   0   2   0   3]
 [  3   2  58   0   6   0]
 [  6   2   3  10   7   1]
 [  1   4   7  11  29   2]
 [  5   3   0   1   2 104]]

Classification Report:
              precision    recall  f1-score   support

           0     0.7656    0.7424    0.7538        66
           1     0.8879    0.9500    0.9179       100
           2     0.8286    0.8406    0.8345        69
           3     0.3846    0.3448    0.3636        29
           4     0.6304    0.5370    0.5800        54
           5     0.8667    0.9043    0.8851       115

    accuracy                         0.7968       433
   macro avg     0.7273    0.7199    0.7225       433
weighted avg     0.7883    0.7968    0.7916       433

