In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("ROBOFLOW_API_KEY")

wRZrM3g3RyKtaCEOlmCU


In [2]:
from roboflow import Roboflow

# Initialize Roboflow with your API key
rf = Roboflow(api_key=api_key)

# ✅ Check if the project exists
workspace = rf.workspace()
print(workspace.projects())  # See if "modanet" is listed

# ✅ Load the project and check versions
project = workspace.project("modanet")
print(project.versions())  # Ensure version 1 exists

# ✅ Download the dataset
dataset = project.version(1).download("coco")


loading Roboflow workspace...
['modanet-nvkmz/clothes-model-umnux-jpjow', 'modanet-nvkmz/my-first-project-wsrh6']
loading Roboflow project...


RuntimeError: {
    "error": {
        "message": "Unsupported request. `GET /modanet-nvkmz/modanet` does not exist or cannot be loaded due to missing permissions.",
        "type": "GraphMethodException",
        "hint": "You can see your active workspace by issuing a GET request to `/` with your `api_key`."
    }
}

In [3]:
dataset_path = "modanet/train"
annotation_path = "modanet/_annotations.coco.json"


In [4]:
from torchvision import transforms

# Define transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to 256x256
    transforms.RandomHorizontalFlip(p=0.5),  # Flip 50% of the time
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Color jitter
    transforms.RandomRotation(15),  # Rotate by ±15 degrees
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])




In [None]:
import json
from torchvision.datasets import CocoDetection
from torch.utils.data import DataLoader
from torchvision.transforms import functional as F

# Resize bounding boxes and masks
def resize_bounding_boxes(boxes, original_size, new_size):
    orig_w, orig_h = original_size
    new_w, new_h = new_size
    scale_x = new_w / orig_w
    scale_y = new_h / orig_h
    
    new_boxes = []
    for box in boxes:
        x1, y1, w, h = box
        new_x1 = x1 * scale_x
        new_y1 = y1 * scale_y
        new_w = w * scale_x
        new_h = h * scale_y
        new_boxes.append([new_x1, new_y1, new_w, new_h])
    
    return new_boxes

class ModaNetDataset(CocoDetection):
    def __getitem__(self, index):
        img, target = super().__getitem__(index)

        # Resize image
        img = F.resize(img, (256, 256))

        # Resize bounding boxes
        boxes = [obj["bbox"] for obj in target]
        original_size = img.size
        new_size = (256, 256)
        resized_boxes = resize_bounding_boxes(boxes, original_size, new_size)

        # Update target with resized boxes
        for i, obj in enumerate(target):
            obj["bbox"] = resized_boxes[i]

        # Apply transforms
        img = transform(img)

        return img, target

# Load dataset
dataset = ModaNetDataset(
    root=dataset_path,
    annFile=annotation_path
)

# DataLoader
data_loader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=lambda x: tuple(zip(*x))
)


In [None]:
import torchvision
from torchvision.models.detection import maskrcnn_resnet50_fpn

def get_model():
    model = maskrcnn_resnet50_fpn(pretrained=True)
    
    # Modify classifier for 13 classes + background
    num_classes = 14
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
        in_features, num_classes
    )

    # Modify mask predictor
    model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(
        model.roi_heads.mask_predictor.conv5_mask.in_channels,
        256,
        num_classes
    )
    
    return model


In [None]:
import torch
from engine import train_one_epoch, evaluate

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model
model = get_model()
model.to(device)

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,
                             momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    lr_scheduler.step()
    evaluate(model, data_loader, device=device)


In [None]:
from PIL import Image
import matplotlib.pyplot as plt

# Load trained model
model.eval()

# Load image
image = Image.open("path/to/image.jpg").convert("RGB")
img_tensor = transform(image).unsqueeze(0).to(device)

# Run inference
with torch.no_grad():
    output = model(img_tensor)[0]

# Display result
fig, ax = plt.subplots(1, figsize=(10, 10))
ax.imshow(image)

# Draw boxes and masks
for box, mask, score, label in zip(output["boxes"], output["masks"], output["scores"], output["labels"]):
    if score > 0.5:
        x1, y1, x2, y2 = box
        ax.add_patch(plt.Rectangle(
            (x1, y1), x2 - x1, y2 - y1,
            fill=False, color='red', linewidth=2
        ))
        mask = mask.squeeze().cpu().numpy()
        ax.imshow(mask, alpha=0.5, cmap='gray')

plt.show()


In [None]:
CLASSES = [
    "__background__", "hat", "sunglasses", "upper-clothes", "skirt", 
    "pants", "dress", "belt", "shoe", "bag", "scarf", "jumpsuit", "socks", "gloves"
]
