In [1]:
# Use this block of code when working on Google Colab in order to save the files in Google Drive
from google.colab import drive
drive.mount('/content/drive')

path = "drive/MyDrive/Thesis/"

In [2]:
%pip install transformers
%pip install geopandas
%pip install torch==1.13.1

In [3]:
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
from PIL import Image, ImageDraw
import torch

import numpy as np
import requests

import geopandas as gpd
import json
import os

from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

from scipy.signal import find_peaks
import pickle


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [4]:
# color palette to map each class to a RGB value

color_palette = [
    [128, 64, 128],  # 0: road - maroon
    [244, 35, 232],  # 1: sidewalk - pink
    [70, 70, 70],  # 2: building - dark gray
    [102, 102, 156],  # 3: wall - purple
    [190, 153, 153],  # 4: fence - light brown
    [153, 153, 153],  # 5: pole - gray
    [250, 170, 30],  # 6: traffic light - orange
    [220, 220, 0],  # 7: traffic sign - yellow
    [107, 142, 35],  # 8: vegetation - dark green
    [152, 251, 152],  # 9: terrain - light green
    [70, 130, 180],  # 10: sky - blue
    [220, 20, 60],  # 11: person - red
    [255, 0, 0],  # 12: rider - bright red
    [0, 0, 142],  # 13: car - dark blue
    [0, 0, 70],  # 14: truck - navy blue
    [0, 60, 100],  # 15: bus - dark teal
    [0, 80, 100],  # 16: train - dark green
    [0, 0, 230],  # 17: motorcycle - blue
    [119, 11, 32]  # 18: bicycle - dark red
]

In [5]:
def prepare_folders(path, city):
  # Create the directories to store the images, segments and pickles if they do not exist
  dir_path = os.path.join(path, "results", city, "images")
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)
  
  dir_path = os.path.join(path, "results", city, "segments")
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)
  
  dir_path = os.path.join(path, "results", city, "pickles")
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)

  dir_path = os.path.join(path, "results", city, "roads")
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)

In [6]:
def get_models():
    processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic")
    model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic")

    return processor, model

In [7]:
def segment_images(image_path, image_id, is_panoramic, processor, model, city, path=""):
    image = Image.open(requests.get(image_path, stream=True).raw)

    # If the image is panormic, we need to cut the band in the bottom of it
    if is_panoramic:
        # Get the size of the image
        width, height = image.size

        # Crop the bottom 10% of the image
        bottom_crop = int(height * 0.2)
        image = image.crop((0, 0, width, height - bottom_crop))

    inputs = processor(images=image, return_tensors="pt")

    # forward pass
    with torch.no_grad():
        outputs = model(**inputs)
    
    # you can pass them to processor for postprocessing
    seg = processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
    
    color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
    palette = np.array(color_palette)
    for label, color in enumerate(palette):
        color_seg[seg == label, :] = color

    # Show image + mask
    img = np.array(image) * 0.4 + color_seg * 0.6
    img = img.astype(np.uint8)

    # Save original image
    dir_path = os.path.join(path, "results", city, "images")
    img_path = os.path.join(dir_path, "{}.jpg".format(image_id))
    image.save(img_path)
    
    # Convert numpy array to PIL Image and save masked image
    pil_img = Image.fromarray(img)
    dir_path = os.path.join(path, "results", city, "segments")
    img_path = os.path.join(dir_path, "{}.png".format(image_id))
    pil_img.save(img_path)

    # Save segmentation array as a pickle file
    dir_path = os.path.join(path, "results", city, "pickles")
    pickle_path = os.path.join(dir_path, "{}.pkl".format(image_id))
    with open(pickle_path, 'wb') as f:
        pickle.dump(seg, f)
    
    return pickle_path

In [8]:
# Based on Matthew Danish code (https://github.com/mrd/vsvi_filter/tree/master)
def run_length_encoding(in_array):
    image_array = np.asarray(in_array)
    length = len(image_array)
    if length == 0: 
        return (None, None, None)
    else:
        pairwise_unequal = image_array[1:] != image_array[:-1]
        change_points = np.append(np.where(pairwise_unequal), length - 1)   # must include last element posi
        run_lengths = np.diff(np.append(-1, change_points))       # run lengths
        positions = np.cumsum(np.append(0, run_lengths))[:-1] # positions
        return(run_lengths, positions, image_array[change_points])

def get_road_pixels_per_column(prediction):
    road_pixels = prediction == 0.0 # The label for the roads is 0
    road_pixels_per_col = np.zeros(road_pixels.shape[1])
    for i in range(road_pixels.shape[1]):
        run_lengths, positions, values = run_length_encoding(road_pixels[:,i])
        road_pixels_per_col[i] = run_lengths[values.nonzero()].max(initial=0)
    return road_pixels_per_col

def get_road_centres(prediction, distance=2000, prominence=100):
    road_pixels_per_col = get_road_pixels_per_column(prediction)
    return find_peaks(road_pixels_per_col, distance=distance, prominence=prominence)[0]

In [9]:
def find_road_center(filename, image_id, city, path):
    predict = np.load(filename, allow_pickle=True)

    distance = int(2000 * predict.shape[1] // 5760)
    prominence = int(100 * predict.shape[0] // 2880)

    centres = get_road_centres(predict, distance=distance, prominence=prominence)
    
    if centres.size > 0:
      palette_bytes = bytes([c for color in color_palette for c in color]) # concatenate rgb
      predict_np = predict.numpy()
      mask = Image.fromarray(predict_np.astype('uint8')).convert('P')
      mask.putpalette(palette_bytes)
      mask.load()
      
      draw = ImageDraw.Draw(mask)
      
      for centre in centres:
        draw.line((centre, 0, centre, mask.size[1]), width=4, fill=(0,255,0))
    
      path_to_file="{}results/{}/roads/{}.png".format(path, city, image_id)
      mask.save(path_to_file)

      return True
    else:
      return False

In [10]:
# Download images
def download_image(image_metadata, city, access_token, processor, model, path=""):
    header = {'Authorization': 'OAuth {}'.format(access_token)}

    image_id = image_metadata["properties"]["id"]
    is_panoramic = image_metadata["properties"]["is_pano"]
    
    url = 'https://graph.mapillary.com/{}?fields=thumb_original_url'.format(image_id)
    response = requests.get(url, headers=header)
    data = response.json()
    image_url = data["thumb_original_url"]

    # Image segmentation
    pickle_path = segment_images(image_url, image_id, is_panoramic, processor, model, city)
    
    # Find roads to determine if the image is suitable for the analysis or not AND crop the panoramic images
    usable = find_road_center(pickle_path, image_id, city, path)

    return [image_id, usable]

In [11]:
def download_images_for_points(city, access_token, path=""):
    path_to_file="{}results/{}/data/points_with_features.gpkg".format(path, city)
    gdf_features = gpd.read_file(path_to_file)

    # Load cache
    cache_file = os.path.join(path, "results", city, "data", "cache.txt")
    if os.path.exists(cache_file):
        with open(cache_file, "r") as f:
            cache = set([line.strip() for line in f])
    else:
        cache = set()

    prepare_folders(path, city)
    processor, model = get_models()
    images_results = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = []
  
        for feature in gdf_features["feature"]:
            feature = json.loads(feature)
            image_id = feature["properties"]["id"]
            
            if image_id not in cache:
                futures.append(executor.submit(download_image, feature, city, access_token, processor, model, path))
 
        for future in (pbar:= tqdm(futures, total=len(futures))):
            pbar.set_description(f"Downloading images")
            image_result = future.result()
            images_results.append(image_result)
            cache.add(image_result[0])
    
    # Save cache
    with open(cache_file, "w") as f:
        for image_id in cache:
            f.write("{}\n".format(image_id))

In [12]:
# Get the roadnetwork of a specific city using OpenStreetMap data
city = "Kampala, Uganda"

# Set access token for mapillary
access_token = "MLY|6267906093323631|fba37c53726a386c951323ee5b9874bf"

In [13]:
download_images_for_points(city, access_token, path)

Downloading images:   0%|          | 101/49480 [07:34<61:43:05,  4.50s/it]
