## This script creates a table from coordinates to corresponding tif images where you can find those coordinates and will look for the catalog IDs of those which are missing that information

## Steps:
### 1. Loads the tomnod geojson file and tifRange file
### 2. Cleans the list of catalog IDs into a column called 'complete_catalog_id'
### 3. Creates a reference table for the damage points <-> tif file

In [2]:
import gdal
import geopandas as gpd
import os
import pandas as pd
import numpy as np

## load the tomnod geojson file, TOMNOD = GEOJSON & tifRange file

In [22]:
tomnod = gpd.read_file("/Users/tessaschneider/Projects/Dcubed/Indexgeojson/data/digitalglobe_crowdsourcing_hurricane_harvey_20170915.geojson")
tifRange = pd.read_csv("/Users/tessaschneider/Projects/Dcubed/Indexgeojson/data/tifRange-tiles-run-1.csv", header = None, names = ['tif_id', 'minxy','maxxy'])

In [23]:
tomnod

Unnamed: 0,id,tag_id,label,tagger_id,type_id,score,agreement,chip_size,chip_url,timestamp,overlay_id,acquisition_date,sensor,catalog_id,map_id,geometry
0,214149-59,59,Flooded / Damaged Building,30425,226,1.000000,32,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60044,2017-08-31T23:39:39,GE01,105001000B95E200,9029,POINT (-95.096374 29.84318)
1,214149-72,72,Flooded / Damaged Building,6294177,226,1.000000,24,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,33640,POINT (-95.50154999999999 29.760742)
2,214149-76,76,Flooded / Damaged Building,6294177,226,1.000000,20,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,28027,POINT (-95.50540700000001 29.75222)
3,214149-108,108,Flooded / Damaged Building,6294177,226,1.000000,28,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32835,POINT (-95.521941 29.748386)
4,214149-111,111,Flooded / Damaged Building,6294177,226,1.000000,25,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32835,POINT (-95.52256800000001 29.748596)
5,214149-113,113,Flooded / Damaged Building,6294177,226,1.000000,14,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,22893,POINT (-95.522836 29.749514)
6,214149-115,115,Flooded / Damaged Building,6294177,226,1.000000,19,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:56,60043,2017-08-31T23:39:05,GE01,105001000B95E100,35089,POINT (-95.526785 29.746132)
7,214149-124,124,Flooded / Damaged Building,6294177,226,0.985248,10,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:56,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32561,POINT (-95.530035 29.747711)
8,214149-134,134,Flooded / Damaged Building,6294177,226,1.000000,18,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:56,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32561,POINT (-95.53206900000001 29.74719)
9,214149-141,141,Flooded / Damaged Building,6294177,226,0.999556,11,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,31827,POINT (-95.533699 29.747637)


## splitting coordinates to different callable variables

In [24]:
tomnod_x = tomnod['geometry'].x
tomnod_y = tomnod['geometry'].y
tomnod['tomnod_x'] = tomnod_x
tomnod['tomnod_y'] = tomnod_y

## convert the lat lng tuple into individual floats

In [25]:
def process_tup(tup):
    return [float(ele) for ele in (tup.strip('()').split(','))]

## get lat lng range of catalog_id (corner points), iterate over the tifs in order to get the catalog's range

In [26]:
tifRange['tif_id']

0        20170831_103001006D44B500_3002011_jpeg_compres...
1        20170902_105001000B9D7E00_3030122_jpeg_compres...
2        20170831_103001006D44B500_3000222_jpeg_compres...
3        20170830_103001006F018C00_2111333_jpeg_compres...
4        20170830_103001006FAD6200_2131131_jpeg_compres...
5        20170830_103001006FCDF300_2113011_jpeg_compres...
6        20170902_103001006F3C8400_3021302_jpeg_compres...
7        20170830_103001006F018C00_2111321_jpeg_compres...
8        20170829_1040010032658F00_2110302_jpeg_compres...
9        20170830_103001006EB42900_3020032_jpeg_compres...
10       20170830_103001006F018C00_2113301_jpeg_compres...
11       20170902_1030010071D49400_3200110_jpeg_compres...
12       20170830_103001006F018C00_2131103_jpeg_compres...
13       20170829_1040010032658F00_2112101_jpeg_compres...
14       20170829_10400100325CA800_2110201_jpeg_compres...
15       20170902_103001006F3C8400_3023122_jpeg_compres...
16       20170831_103001006D44B500_3002130_jpeg_compres.

## known catalogs that exist in the data set

In [27]:
POST_EVENT_CATALOG = ['105001000B95E200', '105001000B95E100', '1040010032211E00']

In [28]:
tifRange.iloc[3]['tif_id']

'20170830_103001006F018C00_2111333_jpeg_compressed_05_03.tif'

## 

In [29]:
tomnod['tif_id'] = ""

for index_tomnod, row_tomnod in tomnod.iterrows():
    if index_tomnod % 1 == 0:
        print('tomnod row: ',index_tomnod)
    for index_tif, row_tif in tifRange.iterrows():
#         print(row_tif.loc['tif_id'])
#     for file in os.listdir('image_tiles/'):
#         if file.endswith('.tif'):
#             minmax = get_range_tif('image_tiles/' + file)
#             minxy = minmax[0]
#             maxxy = minmax[1]
        minxy = process_tup(row_tif["minxy"])
        maxxy = process_tup(row_tif["maxxy"])
        
        if minxy[0] <= row_tomnod['tomnod_x'] <= maxxy[0] \
                and minxy[1] <= row_tomnod['tomnod_y'] <= maxxy[1]:
            if tomnod.at[index_tomnod,'tif_id'] == "":
                tomnod.at[index_tomnod,'tif_id'] = row_tif["tif_id"]
                print ('yaaas')
            elif tomnod.at[index_tomnod,'tif_id'] != "":
                tomnod = tomnod.append(tomnod.iloc[index_tomnod], ignore_index=True)
                tomnod.at[index_tomnod,'tif_id'] = row_tif["tif_id"]

tomnod row:  0
yaaas
tomnod row:  1
yaaas


KeyboardInterrupt: 

In [None]:
# tomnod.sort_values('tif_id').head(10)
# tomnod[-tomnod['tif_id'].isnull()]
# type(tomnod.loc[10,'tif_id']) == float
tomnod[tomnod.id == '214149-59']

## to add a small sample set of the tomnod geojson for testing

In [12]:
tomnod[tomnod["tif_id"] != ""].to_file('coordinateandtif.geojson', driver="GeoJSON")
tomnod[tomnod["tif_id"] != ""]["tif_id"].to_csv("list.txt")

ValueError: Geometry column cannot contain mutiple geometry types when writing to file.

## to remove the index values in the txt file list of small sample set

In [13]:
len(tomnod[tomnod["tif_id"] != ""]["tif_id"].unique())

0

## define where to save output list of sample set by tif_id 

In [426]:
np.savetxt("list.txt", tomnod[tomnod["tif_id"] != ""]["tif_id"].unique(), fmt = "%s")

## 

In [31]:
tomnod.append(tomnod[tomnod.id == '214149-59'], ignore_index=True)
#tomnod.iloc[0]

Unnamed: 0,id,tag_id,label,tagger_id,type_id,score,agreement,chip_size,chip_url,timestamp,overlay_id,acquisition_date,sensor,catalog_id,map_id,geometry,tomnod_x,tomnod_y,tif_id
0,214149-59,59,Flooded / Damaged Building,30425,226,1.000000,32,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60044,2017-08-31T23:39:39,GE01,105001000B95E200,9029,POINT (-95.096374 29.84318),-95.096374,29.843180,20170830_105001000B920800_3020132_jpeg_compres...
1,214149-72,72,Flooded / Damaged Building,6294177,226,1.000000,24,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,33640,POINT (-95.50154999999999 29.760742),-95.501550,29.760742,20170830_103001006FAD6200_3020021_jpeg_compres...
2,214149-76,76,Flooded / Damaged Building,6294177,226,1.000000,20,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,28027,POINT (-95.50540700000001 29.75222),-95.505407,29.752220,
3,214149-108,108,Flooded / Damaged Building,6294177,226,1.000000,28,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32835,POINT (-95.521941 29.748386),-95.521941,29.748386,
4,214149-111,111,Flooded / Damaged Building,6294177,226,1.000000,25,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32835,POINT (-95.52256800000001 29.748596),-95.522568,29.748596,
5,214149-113,113,Flooded / Damaged Building,6294177,226,1.000000,14,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,22893,POINT (-95.522836 29.749514),-95.522836,29.749514,
6,214149-115,115,Flooded / Damaged Building,6294177,226,1.000000,19,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:56,60043,2017-08-31T23:39:05,GE01,105001000B95E100,35089,POINT (-95.526785 29.746132),-95.526785,29.746132,
7,214149-124,124,Flooded / Damaged Building,6294177,226,0.985248,10,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:56,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32561,POINT (-95.530035 29.747711),-95.530035,29.747711,
8,214149-134,134,Flooded / Damaged Building,6294177,226,1.000000,18,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:56,60043,2017-08-31T23:39:05,GE01,105001000B95E100,32561,POINT (-95.53206900000001 29.74719),-95.532069,29.747190,
9,214149-141,141,Flooded / Damaged Building,6294177,226,0.999556,11,,https://s3.amazonaws.com/explorationlab/chips/...,2017-09-15T22:01:52,60043,2017-08-31T23:39:05,GE01,105001000B95E100,31827,POINT (-95.533699 29.747637),-95.533699,29.747637,


In [397]:
# tomnod.to_csv('coordinateAndTif.csv', encoding='utf-8')
tomnod.to_file('coordinateandtif.geojson', driver="GeoJSON")

DriverIOError: GeoJSON driver doesn't support creating a layer on a read-only datasource

## check output file

In [385]:
tomnod = gpd.read_file("/Users/tessaschneider/Projects/Dcubed/Indexgeojson/notebooks/coordinateandtif.geojson")

## add bounding boxes to geojson using shp_buffer in utils folder buffer size = .00015 