# Manual Image Selection Script

This notebook contains parts of code from Dr. Vadim Savenkov

## Before running the notebook:
- Put suitable data into a subfolder "data" (data with a column "geometry" that contains polygon coordinate data parsable with geopandas)
- There might be issues with the "geckodriver", which is needed to access the satellite imagery (in this case checkout [StackOverflow](https://stackoverflow.com/questions/40208051/selenium-using-python-geckodriver-executable-needs-to-be-in-path))

## 1) Load packages and data

In [5]:
# Load packages
import os, io, json
import pandas as pd
import geopandas as gpd
from ast import literal_eval
from shapely.geometry import shape, mapping as shapely_mapping
from shapely import wkt
import folium
from PIL import Image, ImageDraw
from __future__ import print_function
from ipywidgets import interact,interact_manual,HBox,Output,Tab
import time

import cv2
import random
import re
import sys
import multiprocessing.dummy as mp 
import numpy as np

In [59]:
# Create an unique id based on position of parking lot
def parking_name(row, ts):  
    try:
        pt = row
        a = round(pt[0], 7)
        b = round(pt[1], 7)
    except:
        return np.nan
    
    return f"{a}_{b}_ts{ts}"

# Find tags containing truck or asphalt surface
def find_tags(df2):
    ls = []
    for j in range(0, len(df2)):
        index = True
        for i in df2.iloc[j,:].all_tags:
            #if i["key"] == "surface" and i["value"] == "asphalt":
            if i["key"] == "name" and i["value"].lower().__contains__("truck"):   # or i["value"].lower().__contains__("lkw")):   # FLAG
                ls.append(True)
                index = False
        
        if index:
            ls.append(False)
        
    return ls

# Get the first point of polygon list as tuple of floats
def get_point(geom):
    try:
        pt = re.split(r",", geom[9:])[0]
        pt = tuple(map(float, pt.split(' ')))
        pt_s = (pt[1], pt[0])
    except:
        return np.nan
    
    return pt_s

In [30]:
# SPECIFY WHICH TILE SERVER SHOULD BE USED
# 1 = World Imagery from Esri Satellite (3-5 years old) - https://www.arcgis.com/home/item.html?id=10df2279f9684e4a9f6a7f08febac2a9
# 2 = World Imagery (Clarity) from Esri Satellite (more than 3-5 years old, however higher image quality)
TS = 1

In [87]:
# Pre-processing
# This part needs to be adapted according to database

# THIS CAN TAKE UP TO A FEW MINUTES

start_time = time.localtime()
start_t = time.time()
print("Started at:", time.asctime(start_time))

file_nr = [0,1] # Specify the number of files that should be merged
first = True

for i in file_nr:
    df = pd.read_json(f"data/osm_parking_polygons_0{i}_test.json", lines=True)
    df = df[find_tags(df)] # only keep entries that fulfil tag requirements
    df["pt"] = df["geometry"].transform(lambda x: get_point(x)) # get first point of coordinates list
    df["id"] = [parking_name(x, TS) for x in df["pt"]] # create id based on coordinates
    df['geometry'] = df['geometry'].apply(wkt.loads)
    df = df[["id", "pt", "geometry", "all_tags"]]
    df = df.dropna()
    
    if first:
        temp = df
        first = False
    else: 
        temp = pd.concat([temp, df])
    
    print(f"File nr. {i} successfully loaded")
        
real_polys = gpd.GeoDataFrame(temp, geometry="geometry")

end_t = time.time()
print("Computation time (Min.):", (end_t-start_t)/60)

Started at: Sun Jul  3 14:32:11 2022
File nr. 0 successfully loaded
File nr. 1 successfully loaded
Computation time (Min.): 0.00032425324122111


In [93]:
# Start with an existing whitelist to look through it again

whitelist_name = "osm_data_trucks"
df = pd.read_csv(f"data/{whitelist_name}.csv", index_col=0)
df["pt"] = df["geometry"].transform(lambda x: get_point(x))
df["id"] = [parking_name(x, TS) for x in df["pt"]]
df['geometry'] = df['geometry'].apply(wkt.loads)
real_polys = gpd.GeoDataFrame(df, geometry="geometry")

In [94]:
# Drop full duplicates and reset index
real_polys = real_polys.drop_duplicates(subset=["id", "pt", "geometry"])
real_polys = real_polys.reset_index()

In [70]:
# Create backup if needed
#real_polys_backup = real_polys.copy()

In [95]:
# Final dataframe
real_polys #= real_polys.loc[:249535,]

Unnamed: 0,index,id_new,id,pt,geometry,all_tags
0,0,36.5400085_-89.5958862_ts1,36.5400085_-89.5958862_ts1,"(36.5400085, -89.5958862)","POLYGON ((-89.59589 36.54001, -89.59598 36.539...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
1,1,54.8565589_-2.875634_ts1,54.8565589_-2.875634_ts1,"(54.8565589, -2.875634)","POLYGON ((-2.87563 54.85656, -2.87533 54.85627...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
2,2,38.2476037_-84.5447897_ts1,38.2476037_-84.5447897_ts1,"(38.2476037, -84.5447897)","POLYGON ((-84.54479 38.24760, -84.54519 38.248...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
3,3,35.4256176_-84.6888667_ts1,35.4256176_-84.6888667_ts1,"(35.4256176, -84.6888667)","POLYGON ((-84.68887 35.42562, -84.68878 35.425...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
4,4,42.7899152_-82.66045_ts1,42.7899152_-82.66045_ts1,"(42.7899152, -82.66045)","POLYGON ((-82.66045 42.78992, -82.65978 42.790...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
...,...,...,...,...,...,...
677,1068,39.7480231_-94.6946061_ts2,39.7480231_-94.6946061_ts1,"(39.7480231, -94.6946061)","POLYGON ((-94.69461 39.74802, -94.69484 39.748...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
678,1069,49.6436901_12.5304886_ts2,49.6436901_12.5304886_ts1,"(49.6436901, 12.5304886)","POLYGON ((12.53049 49.64369, 12.53170 49.64420...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
679,1070,46.4185895_4.8634203_ts2,46.4185895_4.8634203_ts1,"(46.4185895, 4.8634203)","POLYGON ((4.86342 46.41859, 4.86391 46.41953, ...","[{'key': 'access', 'value': 'yes'}, {'key': 'a..."
680,1071,46.4192786_4.8626612_ts2,46.4192786_4.8626612_ts1,"(46.4192786, 4.8626612)","POLYGON ((4.86266 46.41928, 4.86297 46.41938, ...","[{'key': 'access', 'value': 'yes'}, {'key': 'a..."


## 2) Load functions to get image with labels

In [102]:
# Create image
BOUNDS_RGB = (0xff,0x78, 0x00)

def rgbcolor(r,g,b):
    """
    turn r, g, b integers into a CSS color code 
    """
    return f"#{r:02x}{g:02x}{b:02x}"

# Get the bounding box around an arbitrarily shaped geometry object
def geometry_bbox(geometry, init_bounds=None, x_offset=0., y_offset=0.):
    bounds = init_bounds or [[1000,-1000],[-1000,1000]]
    xs = [c[0] for c in geometry.exterior.coords]
    
    ys = [c[1] for c in geometry.exterior.coords]
    
    return [
        #southwest
        [min(bounds[0][0], min(ys)-y_offset), max(bounds[0][1], max(xs)+x_offset)],
        #northeast
        [max(bounds[1][0], max(ys)+y_offset), min(bounds[1][1], min(xs)-x_offset)]
    ]


def make_map(pt, ts, geometry=None, show_geometry=False, size=320):
    """
    create folium map with a given center, 
    optionally fit and show a geometry and/or a larger bounding box
    """
        
    # Create satellite image map for the coordinate point specified
    m = folium.Map(location=pt, min_zoom=15, width=size, height=size, zoom_control=False, attribution_control=False)
    
    # First tile server
    if ts == 1:
        folium.TileLayer(
            tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
            attr = 'Esri', name = 'Esri Satellite', overlay = False, control = True
        ).add_to(m)
    
    # Second tile server
    elif ts == 2:
        folium.TileLayer(
            tiles = 'https://clarity.maptiles.arcgis.com/arcgis/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
            attr = 'Esri', name = 'Esri Satellite', overlay = False, control = True
        ).add_to(m)
    
    # Bounds of inital point
    bounds = [[pt[0], pt[1]],[pt[0], pt[1]]] #southwest, northeast
    
    # Plot lines of geometry
    if geometry:
        if show_geometry:
            folium.GeoJson(data=geometry).add_to(m)
            bounds_fit = geometry_bbox(geometry, init_bounds=bounds, x_offset=0, y_offset=0)
    
    m.fit_bounds(bounds_fit)
    
    return m, bounds

## 3) Manually filter images

### Instructions

- Start with the selection procedure by executing the cell below.

- If an image is fine, just press enter while the courser is in the input cell.

- If it is faulty in any way, enter any character of your choice (unequal "t" and "exit").

- To stop the procedure, type and enter "exit".

- If the image shown is a truck parking lot, type "t" (pay attention to not press t to blacklist an item!)

- The counter and id of the last image checked will be printed. If you stop and want to proceed later on, change the start_nr variable below to the last number printed.

- If the interactive "pop-up" of the map is too big (i.e. you always have to scroll down to get to the input bar), just adjust the window width (in jupyter lab, increase the width of the data browser on the left)

<br>

#### Caution:
- Don't forget to save the blacklist and whitelist as csv **before** re-running the selection cell (resets lists!)

- Always change the file name after saving a new set of lists.

- Always note the last counter value you checked when exiting, in order to know where to resume (best practice would be to change the start_nr parameter after each run).

- If you exit the loop, the last image shown will **not** yet be saved as ok nor faulty.

In [106]:
# Sart selection
start_nr = 0 # if you stop somewhere inbetween, enter here the last image nr. printed

blacklist = []
whitelist = []
trucklist = []

for i in range(start_nr, real_polys.shape[0]):
    row = real_polys.iloc[i]
    m, b = make_map(row.pt, TS, geometry=row.geometry, show_geometry=True, size=640) # create map with boundaries
    
    display(m)
    inp = input(f"Image Nr. {i} ok? Just enter. If truck, type \"t\". To exit type \"exit\"")
    
    if inp == "exit":
        print("Image to be checked next:", row["id"])
        break
    elif inp == "t":
        trucklist.append(row["id"]) # This feature may be useful if one is looking through car images and finds truck images
    elif inp != "":
        blacklist.append(row["id"])
    else: 
        whitelist.append(row["id"])
    
    print("Last image checked:", row["id"])
    print("")

Image Nr. 0 ok? Just enter. If truck, type "t". To exit type "exit" 


Last image checked: 36.5400085_-89.5958862_ts1



Image Nr. 1 ok? Just enter. If truck, type "t". To exit type "exit"  


Last image checked: 54.8565589_-2.875634_ts1



Image Nr. 2 ok? Just enter. If truck, type "t". To exit type "exit" exit


Image to be checked next: 38.2476037_-84.5447897_ts1


In [107]:
# Print blacklist
blacklist

['54.8565589_-2.875634_ts1']

In [108]:
# Print whitelist (to record which images have already been checked)
whitelist

['36.5400085_-89.5958862_ts1']

## Save blacklist and whitelist

In [109]:
# Function to save a csv, but check if it is already created
def save_file(list1, csv_name, PATH):
    if os.path.isfile(f"{PATH}/{csv_name}.csv"):
        return f"File \"{csv_name}\" already created! Change filename!"
    else:
        dict1 = {'id': list1}
        df = pd.DataFrame(dict1)
        df.to_csv(f"{PATH}/{csv_name}.csv") 
        return f"File \"{csv_name}\" successfully saved!"

In [111]:
# Specify path where to save lists as csv:
path = "data/cars"

# Saving lists
print(save_file(whitelist, "whitelist1", path))
print(save_file(blacklist, "blacklist1", path))
#print(save_file(trucklist, "trucklist1", path))

File "whitelist1" successfully saved!
File "blacklist1" successfully saved!


In [None]:
# Difficult: 
d = ["53.244_-3.191_ts2", "43.296_3.216_ts2", "41.501_-74.206_ts2", "42.571_-113.782_ts2", "41.247_-87.861_ts2", "49.96_7.951_ts2"]

In [None]:
# Example, Truck Parking lot not built yet: 51.963_6.029_ts2, -37.805_144.753_ts2, 59.303_15.261_ts2, 41.092_29.288_ts2, 51.196_13.739_ts2
# Not fully built yet ? 42.898_-74.099_ts2
# Ngeative example: -29.487_152.335_ts2

In [None]:
# "The perfect" truck parking lot: 51.087_13.274_ts2

In [None]:
# Nice car parking lot with feldern around: 45.0139306_0.1170134_ts2