# Notebook for Sorting collected SVI into Usable or Unusable

**Note:**
- This notebook is used for sorting SVI collected from the Google API into usable or unusable for image segmentation
- It was originally executed on the full data set of over 16000 images, but was now adjusted to sort images in `data_visible`

In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import shutil
import zipfile

In [2]:
# Define transforms
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

### Load the model and infer classes of unseen images

In [5]:
# Change to the top directory
os.chdir('..')

# Define the model with the same architecture modifications
model = models.mobilenet_v2(pretrained=False)  # No need to load pretrained weights
model.classifier[1] = nn.Linear(model.last_channel, 2)  # 2 classes

# Load the state dictionary
model_path = 'models/img_sorting_mobilenetV2.pth'
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Set model to evaluation mode
model.to(device).eval()

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [6]:
place_name = "Stuttgart"

os.chdir(f'data_visible/interim/svi/{place_name}')  # Note: This notebook was originally run in the regular data directory, where the full image set is located
zipped_folder_path = 'edges_unsorted.zip'
unzip_destination = 'unsorted_images'

# Unzip the folder
with zipfile.ZipFile(zipped_folder_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_destination)

In [7]:
unsorted_dir = unzip_destination
sorted_dir = 'sorted_images'
classes = ['class_deleted', 'class_kept']

# Initialize a counter
processed_images = 0

# Process each unsorted image
for image_name in os.listdir(unsorted_dir):
    image_path = os.path.join(unsorted_dir, image_name)
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = image.to(device)

    # Inference
    model.eval()  # Ensure model is in eval mode
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
        predicted_class = classes[predicted.item()]

    # Move the image to the corresponding folder
    target_path = os.path.join(sorted_dir, predicted_class, image_name)
    shutil.move(image_path, target_path)

    processed_images += 1
    if processed_images % 100 == 0:
        print(f"Processed {processed_images} images")

print("Sorting complete.")

Processed 100 images
Processed 200 images
Processed 300 images
Processed 400 images
Processed 500 images
Processed 600 images
Processed 700 images
Processed 800 images
Processed 900 images
Processed 1000 images
Processed 1100 images
Processed 1200 images
Processed 1300 images
Processed 1400 images
Processed 1500 images
Processed 1600 images
Processed 1700 images
Processed 1800 images
Processed 1900 images
Processed 2000 images
Processed 2100 images
Processed 2200 images
Processed 2300 images
Processed 2400 images
Processed 2500 images
Processed 2600 images
Processed 2700 images
Processed 2800 images
Processed 2900 images
Processed 3000 images
Processed 3100 images
Processed 3200 images
Processed 3300 images
Processed 3400 images
Processed 3500 images
Processed 3600 images
Processed 3700 images
Processed 3800 images
Processed 3900 images
Processed 4000 images
Processed 4100 images
Processed 4200 images
Processed 4300 images
Processed 4400 images
Processed 4500 images
Processed 4600 imag

In [8]:
sorted_dir = 'sorted_images'
class_deleted_dir = os.path.join(sorted_dir, 'class_deleted')
class_kept_dir = os.path.join(sorted_dir, 'class_kept')

# Count the number of files in each folder
num_deleted = len(os.listdir(class_deleted_dir))
num_kept = len(os.listdir(class_kept_dir))

print(f"Number of images sorted into class_deleted: {num_deleted}")
print(f"Number of images sorted into class_kept: {num_kept}")

Number of images sorted into class_deleted: 3251
Number of images sorted into class_kept: 12926
