In [None]:
import os
import numpy as np
import torch
from PIL import Image


In [None]:
# 下载Penn-Fudan dataset
wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip
# 解压到当前目录
unzip PennFudanPed.zip

In [None]:
Image.open('PennFudanPed/PNGImages/FudanPed00001.png')
mask = Image.open('PennFudanPed/PedMasks/FudanPed00001_mask.png')
mask.putpalette([
    0, 0, 0, # black background
    255, 0, 0, # index 1 is red
    255, 255, 0, # index 2 is yellow
    255, 153, 0, # index 3 is orange
])#放置调色板
 
mask

In [None]:
#让我们为PennFudan数据集编写一个dataset
class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self,root,transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files,sorting them to
        #ensure that they are aligned 
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))#os.listdir() 方法用于返回指定的文件夹
        #包含的文件或文件夹的名字的列表
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))
    
    def __getitem__(self,idx):
        #load images and masks
        img_path = os.path.join(self.root,"PNGImages",self.img[idx])
        mask_path = os.path.join(self.root,"PedMasks",self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        #notice we haven't convert mask to RGB
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        #convert PIL Image to a numpy array
        mask = np.array(mask)
        obj_ids = np.unique(mask)#对于一维数组或者列表，unique函数去除其中重复的元素，并按元素由小到大返回一个新的无元素重复的元组或者列表
        # we sort the mask and it's easy to know the first id is the background.Remove it
        obj_ids = obj_ids[1:]
        
        #split the color-encoded mask into a set of binary masks
        masks = mask == obj_ids[:,None,None]
        
        #get  bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(mask[i])
            #.np.where(condition,x,y) 当where内有三个参数时，第一个参数表示条件，当条件成立时where方法返回x，当条件不成立时where返回y
            #.np.where(condition) 当where内只有一个参数时，那个参数表示条件，
            #当条件成立时，where返回的是每个符合condition条件元素的坐标,返回的是以元组的形式
            x_min = np.min(pos[1])
            x_max = np.max(pos[1])
            y_min = np.min(pos[0])
            y_max = np.max(pos[0])
            boxes.append([x_min,y_min,x_max,y_max])
            
        #convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtpye=torch.float32)
        #there is only one class
        labels = torch.ones((num_objs,),dtype= torch.int64)
        masks = torch.as_tensor(masks,dtpye=torch.unit8)
        
        image_id = torch.tensor([idx])
        area = (boxes[:,3]-boxes[:,1])*(boxes[:,2]-boxes[:,0])
        #suppose all  instances are not crowd
        iscrowd = torch.zeros((nums_obj,), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transforms is not None:
            img,target = self.transforms(img,target)
            
            
        return img,target
    
    def __len__(self):
        return len(self.imgs)
            
        
            
        

In [None]:
dataset = PennFudanDataset('PennFudanPed/')
 
dataset[0]

In [None]:
#Define Model
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    #
    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    
    #and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                      hidden_layer,
                                                      num_classes)
    
    return model


In [None]:
# Download TorchVision repo to use some files from references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.4.0
 
cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
#Data Augmengtation
import utils
import transforms as T
from engine import train_one_epoch, evaluate

def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizonflip(0.5))#基于概率来执行图片的水平翻转
    
    return T.compose(transforms)
         
            
        
    

In [None]:
#train
#
# use the PennFudan dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed',get_transform(train=True))
dataset_test = PennFudanDataset('PennFudanPed',get_transform(train=False))

#split the dataset into train and test

torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()#将0~n-1（包括0和n-1）随机打乱后获得的数字序列
dataset = torch.utils.data.Subset(dataset,indices[:-50])#获取指定一个索引序列对应的子数据集
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

#define training and validation data loader
data_loader = torch.utils.data.Dataloader(dataset,batch_size = 2,shuffle=True,
                                         num_workers=4,collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.Dataloader(dataset_test,batch_size=1,shuffle=False,
                                               num_workers=4,collate_fn=utils.collate_fn)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2

# get the model using the helper function
model = get_instance_segmentation_model(num_classes)
model.to(device)

#construct a optimzer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# the learning rate scheduler decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

#training 
num_epochs = 10
for epoch in range(num_epochs):
    train_one_epoch(model,optimizer,data_loader,device,epoch,print_freq=10)
    lr_scheduler.step()
       # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)
 



In [None]:
#Final test
#pick an image
img,_ = dataset_test[0]
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])