In [None]:
!pip install pyvww

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyvww
  Downloading pyvww-0.1.1-py3-none-any.whl (8.9 kB)
Installing collected packages: pyvww
Successfully installed pyvww-0.1.1


In [None]:
!git clone https://github.com/Mxbonn/visualwakewords

Cloning into 'visualwakewords'...
remote: Enumerating objects: 79, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 79 (delta 1), reused 6 (delta 1), pack-reused 71[K
Unpacking objects: 100% (79/79), 892.43 KiB | 2.45 MiB/s, done.


In [None]:
!bash visualwakewords/scripts/download_mscoco.sh path-to-COCO-dataset 2017

Downloading train2017.zip to /content/path-to-COCO-dataset
--2023-06-24 11:32:34--  http://images.cocodataset.org/zips/train2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.162.139, 52.216.152.228, 52.216.43.41, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.162.139|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19336861798 (18G) [application/zip]
Saving to: ‘train2017.zip’


2023-06-24 11:37:37 (60.9 MB/s) - ‘train2017.zip’ saved [19336861798/19336861798]

Unzipping train2017.zip
Downloading val2017.zip to /content/path-to-COCO-dataset
--2023-06-24 11:41:28--  http://images.cocodataset.org/zips/val2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.35.137, 52.217.109.148, 52.216.37.65, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.35.137|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 815585330 (778M) [application/zip]
Saving to: 

In [None]:
TRAIN_ANNOTATIONS_FILE="path-to-mscoco-dataset/annotations/instances_train2017.json"
VAL_ANNOTATIONS_FILE="path-to-mscoco-dataset/annotations/instances_val2017.json"
DIR="path-to-mscoco-dataset/annotations/"
!python visualwakewords/scripts/create_coco_train_minival_split.py \
  --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
  --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--output_dir="${DIR}"

In [None]:
MAXITRAIN_ANNOTATIONS_FILE="path-to-mscoco-dataset/annotations/instances_maxitrain.json"
MINIVAL_ANNOTATIONS_FILE="path-to-mscoco-dataset/annotations/instances_minival.json"
VWW_OUTPUT_DIR="new-path-to-visualwakewords-dataset/annotations/"
!python visualwakewords/scripts/create_visualwakewords_annotations.py \
  --train_annotations_file="${MAXITRAIN_ANNOTATIONS_FILE}" \
  --val_annotations_file="${MINIVAL_ANNOTATIONS_FILE}" \
  --output_dir="${VWW_OUTPUT_DIR}" \
  --threshold=0.005 \
  --foreground_class='person'

Processing /content/-to-mscoco-dataset/annotations/instances_maxitrain.json...
loading annotations into memory...
Done (t=22.80s)
creating index...
index created!
There are 55233 images that now have label person, of the 115228 images in total.
Processing /content/-to-mscoco-dataset/annotations/instances_minival.json...
loading annotations into memory...
Done (t=1.50s)
creating index...
index created!
There are 3800 images that now have label person, of the 8059 images in total.


In [None]:
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision
from torchvision.models import mobilenet_v3_small
from torchvision import datasets
from torchvision import transforms as T
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
import tqdm
import pyvww
from torchvision.datasets import VisionDataset
from PIL import Image
import os
import os.path
from pyvww.utils import VisualWakeWords

#create dataset class
class VisualWakeWordsClassification(VisionDataset):
    """`Visual Wake Words <https://arxiv.org/abs/1906.05721>`_ Dataset.
    Args:
        root (string): Root directory where COCO images are downloaded to.
        annFile (string): Path to json visual wake words annotation file.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.ToTensor``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
    """
    def __init__(self, root, annFile, transform=T.ToTensor(), target_transform=None, transforms=None):
        super(VisualWakeWordsClassification, self).__init__(root, transforms, transform, target_transform)
        self.vww = VisualWakeWords(annFile)
        self.ids = list(sorted(self.vww.imgs.keys()))

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: Tuple (image, target). target is the index of the target class.
        """
        vww = self.vww
        img_id = self.ids[index]
        ann_ids = vww.getAnnIds(imgIds=img_id)
        if ann_ids:
            full_target = vww.loadAnns(ann_ids)
            categories = [ann['category_id'] for ann in full_target]
            if 1 in categories:
              target = 1  # l'immagine contiene una persona
            else:
              target = 0  # l'immagine non contiene una persona
        else:
            target = 0

        path = vww.loadImgs(img_id)[0]['file_name']

        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

In [None]:
import torch
import pyvww

train_dataset = VisualWakeWordsClassification(root="/content/path-to-COCO-dataset/train2017/",
                    annFile="/content/-to-mscoco-dataset/annotations/instances_train2017.json")

loading annotations into memory...
Done (t=23.10s)
creating index...
index created!


In [None]:
import torch
import pyvww

val_dataset = VisualWakeWordsClassification(root="/content/path-to-COCO-dataset/val2017/",
                    annFile="/content/-to-mscoco-dataset/annotations/instances_val2017.json")

loading annotations into memory...
Done (t=2.89s)
creating index...
index created!


In [None]:
from PIL import Image
import os, os.path

path = "/content/path-to-COCO-dataset/train2017/"
for f in os.listdir(path):
    img = Image.open(os.path.join(path,f))
    img_resized = img.resize((224, 224)) #resize train_dataset images to (224,224)
    img_resized.save(os.path.join(path,f), format='JPEG')

In [None]:
from PIL import Image
import os, os.path

path = "/content/path-to-COCO-dataset/val2017/"
for f in os.listdir(path):
    img = Image.open(os.path.join(path,f))
    img_resized = img.resize((224, 224)) #resize val_dataset images to (224,224)
    img_resized.save(os.path.join(path,f), format='JPEG')

In [None]:
#create tar file for train_dataset
!tar -cvf /content/drive/MyDrive/train2017_160.tar /content/path-to-COCO-dataset/train2017/

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
/content/path-to-COCO-dataset/train2017/000000221448.jpg
/content/path-to-COCO-dataset/train2017/000000523729.jpg
/content/path-to-COCO-dataset/train2017/000000137271.jpg
/content/path-to-COCO-dataset/train2017/000000132861.jpg
/content/path-to-COCO-dataset/train2017/000000467727.jpg
/content/path-to-COCO-dataset/train2017/000000001488.jpg
/content/path-to-COCO-dataset/train2017/000000489700.jpg
/content/path-to-COCO-dataset/train2017/000000555648.jpg
/content/path-to-COCO-dataset/train2017/000000161234.jpg
/content/path-to-COCO-dataset/train2017/000000064744.jpg
/content/path-to-COCO-dataset/train2017/000000085706.jpg
/content/path-to-COCO-dataset/train2017/000000004462.jpg
/content/path-to-COCO-dataset/train2017/000000161231.jpg
/content/path-to-COCO-dataset/train2017/000000066003.jpg
/content/path-to-COCO-dataset/train2017/000000212401.jpg
/content/path-to-COCO-dataset/train2017/000000033697.jpg
/content/path-to-COCO-da

In [None]:
#create tar file for val_dataset
!tar -cvf /content/drive/MyDrive/val2017_160.tar /content/path-to-COCO-dataset/val2017/

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
/content/path-to-COCO-dataset/val2017/000000344268.jpg
/content/path-to-COCO-dataset/val2017/000000222559.jpg
/content/path-to-COCO-dataset/val2017/000000534270.jpg
/content/path-to-COCO-dataset/val2017/000000295231.jpg
/content/path-to-COCO-dataset/val2017/000000489764.jpg
/content/path-to-COCO-dataset/val2017/000000054593.jpg
/content/path-to-COCO-dataset/val2017/000000235057.jpg
/content/path-to-COCO-dataset/val2017/000000039480.jpg
/content/path-to-COCO-dataset/val2017/000000521540.jpg
/content/path-to-COCO-dataset/val2017/000000325347.jpg
/content/path-to-COCO-dataset/val2017/000000458223.jpg
/content/path-to-COCO-dataset/val2017/000000146825.jpg
/content/path-to-COCO-dataset/val2017/000000209747.jpg
/content/path-to-COCO-dataset/val2017/000000085376.jpg
/content/path-to-COCO-dataset/val2017/000000259571.jpg
/content/path-to-COCO-dataset/val2017/000000549220.jpg
/content/path-to-COCO-dataset/val2017/000000312549.jpg
/