In [9]:
import pandas as pd
from osgeo import gdal
import math
import os
import glob
from PIL import Image # PIL library supports only TIF format but not newer TIFF
import numpy as np

In [10]:
data_directory = '../data/'
asf_data_directory = data_directory + '3_processed_ASF_data/'
interpolated_AIS_data_directory = data_directory + '5_interpolated_AIS_data/'
#annotated_asf_data_directory = data_directory + '6_annotated_ASF_data/'
cutted_asf_data_directory = data_directory + '6_cutted_ASF_data/'
#sliced_asf_data_directory = data_directory +'7_sliced_ASF_data/'
results_directory = '../results/'

In [11]:
full_data_df = pd.read_csv(results_directory + "2022-11-19_18-44-27_full_data_df.csv", index_col=0)
full_data_df

Unnamed: 0,asf_file,image_datetime,date,minLON,minLAT,maxLON,maxLAT,widthpx,heightpx,datetime_lower,datetime_upper,ais_file
0,S1A_IW_GRDH_1SDV_20210204T015812_20210204T0158...,2021-02-04 01:58:41,2021-02-04,-118.496252,33.197789,-117.964359,33.855266,5921,7319,2021-02-04 01:48:41,2021-02-04 02:08:41,AIS_2021_02_04.csv
1,S1B_IW_GRDH_1SDV_20210105T015737_20210105T0158...,2021-01-05 01:58:06,2021-01-05,-118.575269,33.251929,-117.946359,33.930247,7001,7551,2021-01-05 01:48:06,2021-01-05 02:08:06,AIS_2021_01_05.csv
2,S1B_IW_GRDH_1SDV_20210306T015735_20210306T0158...,2021-03-06 01:58:04,2021-03-06,-118.482835,33.199482,-117.980677,33.82974,5590,7016,2021-03-06 01:48:04,2021-03-06 02:08:04,AIS_2021_03_06.csv


### 1. Slice picture into pieces

In [24]:
CHOSEN_PICTURE_SIZE = 2000 # pictures of example size 1000x1000 will be cutted

for index, row in full_data_df.iterrows():
    print(f"File: {index}")
    file_name = row['asf_file']
    print(f"File name: {file_name}")
    file_name_no_extension = file_name.strip('.tif')
    print(file_name_no_extension)
    working_dir = cutted_asf_data_directory + '/' + file_name_no_extension + '/'
    if not os.path.exists(working_dir):
        os.mkdir(working_dir)
    in_ds = gdal.Open(asf_data_directory + file_name)
    image_height = row['heightpx']
    image_width = row['widthpx']
    print(f"Image_size: {image_width}x{image_height} px")
    image_height = row['heightpx']
    nr_x_edges = math.ceil(image_width / CHOSEN_PICTURE_SIZE)
    nr_y_edges = math.ceil(image_height / CHOSEN_PICTURE_SIZE)
    print(f"X_edges: {nr_x_edges}, Y_edges: {nr_y_edges}")
    print(f"All_edges = x_edges*y_edges = {nr_x_edges*nr_y_edges}")
    # cut the squares
    subset_number = 0
    for i in range(nr_x_edges):
        for j in range(nr_y_edges):
            #print(i, j)
            subset_number += 1
            window = [i*CHOSEN_PICTURE_SIZE, j*CHOSEN_PICTURE_SIZE, CHOSEN_PICTURE_SIZE, CHOSEN_PICTURE_SIZE]
            #out_ds = gdal.Translate(cutted_asf_data_directory + file_name_no_extension + f'_subset{subset_number}.tif', in_ds, srcWin=window)
            out_ds = gdal.Translate(working_dir + f'subset{subset_number}.tif', in_ds, srcWin=window)
            out_width = out_ds.RasterXSize
            out_height = out_ds.RasterYSize
            print(f"Out picture {subset_number} width and height: {out_width}x{out_height} px")
    del in_ds
    del out_ds
    print()

File: 0
File name: S1A_IW_GRDH_1SDV_20210204T015812_20210204T015841_036434_0446E1_2157_TC.tif
S1A_IW_GRDH_1SDV_20210204T015812_20210204T015841_036434_0446E1_2157_TC
Image_size: 5921x7319 px
X_edges: 3, Y_edges: 4
All_edges = x_edges*y_edges = 12
Out picture 1 width and height: 2000x2000 px
Out picture 2 width and height: 2000x2000 px
Out picture 3 width and height: 2000x2000 px
Out picture 4 width and height: 2000x2000 px
Out picture 5 width and height: 2000x2000 px
Out picture 6 width and height: 2000x2000 px
Out picture 7 width and height: 2000x2000 px
Out picture 8 width and height: 2000x2000 px
Out picture 9 width and height: 2000x2000 px
Out picture 10 width and height: 2000x2000 px
Out picture 11 width and height: 2000x2000 px
Out picture 12 width and height: 2000x2000 px

File: 1
File name: S1B_IW_GRDH_1SDV_20210105T015737_20210105T015806_025013_02FA23_627F_TC.tif
S1B_IW_GRDH_1SDV_20210105T015737_20210105T015806_025013_02FA23_627F_TC
Image_size: 7001x7551 px
X_edges: 4, Y_edges:

### 2. Tag pictures

In [21]:
def calculate_px_from_lon2(row, xOrigin, pixelWidth):  
    return round((row['LON'] - xOrigin) / pixelWidth)

In [22]:
def calculate_px_from_lat2(row, yOrigin, pixelHeight):  
    return round((yOrigin - row['LAT']) / pixelHeight)

In [25]:
SHIP_WIDTH_PX = 50
SHIP_HEIGHT_PX = 50
OBJECT_CLASS = 0

for index, row in full_data_df.iterrows():
    print(f"File: {index}")
    asf_file_name = row['asf_file'].strip('.tif')
    print(f"ASF file name: {asf_file_name}")
    ais_file_name = 'interpolated_processed_' + row['ais_file']
    print(f"AIS file name: {ais_file_name}")
    ais_file_df = pd.read_csv(interpolated_AIS_data_directory + ais_file_name, index_col=0)
    # HERE STARTS THE DIFFERENCE
    directory = cutted_asf_data_directory + asf_file_name
    print(f'directory: {directory}')
    if os.path.exists(directory):
        pictures=list()
        for file in os.listdir(directory):
            if file.endswith(".tif"):
                pictures.append(file)
        print(pictures)
        for picture in pictures:
            print(f'picture: {picture}')
            picture_name = picture.strip('.tif')
            print(f'picture_name: {picture_name}')
            in_ds = gdal.Open(directory + '/' + picture)
            #print(in_ds)
            image_width = in_ds.RasterXSize
            image_height = in_ds.RasterYSize
            geo_transform = in_ds.GetGeoTransform()

            minx = geo_transform[0] #minLON
            miny = geo_transform[3] + image_width*geo_transform[4] + image_height*geo_transform[5] #minLAT
            maxx = geo_transform[0] + image_width*geo_transform[1] + image_height*geo_transform[2] #maxLON
            maxy = geo_transform[3] #maxLAT
            
            xOrigin = geo_transform[0] # NEEDED
            yOrigin = geo_transform[3] # NEEDED
            pixelWidth = geo_transform[1] # NEEDED
            pixelHeight = -geo_transform[5] # NEEDED
            print(f"image_width: {image_width}, image_height: {image_height}")
            #print(f"pixel_width: {pixelWidth}, pixel_height: {pixelHeight}")
            print(f"xOrigin: {xOrigin}, yOrigin: {yOrigin}")
            #display(ais_file_df.head())
            
            # Restrict AIS data by the given conditions
            # condition for latitude: between 19.193 and 21.1203
            condition1 = (ais_file_df.LAT > miny) & (ais_file_df.LAT < maxy)
            # condition for latitude: between -157.0894 and -154.4233
            condition2 = (ais_file_df.LON > minx) & (ais_file_df.LON < maxx)
            # final dataframe
            target_area = ais_file_df[condition1 & condition2].copy().reset_index(drop=True)
            #display(target_area)
            
            # Further work
            if len(target_area) != 0:
                # TODO: if to save empty txt file?
                # YOLO format do not need empty files for pictures
                for index, row in target_area.iterrows():
                    target_area['LONpx_X2'] = target_area.apply(lambda row: calculate_px_from_lon2(row, xOrigin, pixelWidth), axis=1)
                    target_area['LATpx_Y2'] = target_area.apply(lambda row: calculate_px_from_lat2(row, yOrigin, pixelHeight), axis=1)
                    #display(target_area.head())
                    
                current_file = directory + '/' + picture_name + '.txt'
                if os.path.exists(current_file):
                    os.remove(current_file) #remove old files
                for index, row in target_area.iterrows():
                    file_object = open(current_file, 'a')
                    #OBJECT_CLASS, SHIP_WIDTH_PX, SHIP_HEIGHT_PX
                    #print("before normalization")
                    #print("LONpx_X2", row['LONpx_X2'])
                    #print("LATpx_Y2", row['LATpx_Y2'])
                    X_CENTER_AXIS_VALUE = row['LONpx_X2']
                    Y_CENTER_AXIS_VALUE = row['LATpx_Y2']
                    #print(OBJECT_CLASS, X_CENTER_AXIS_VALUE, Y_CENTER_AXIS_VALUE, SHIP_WIDTH_PX, SHIP_HEIGHT_PX)
                    #print("after normalization")
                    #X_CENTER_AXIS_VALUE = (X_CENTER_AXIS_VALUE + SHIP_WIDTH_PX/2) / image_width
                    #Y_CENTER_AXIS_VALUE = (Y_CENTER_AXIS_VALUE + SHIP_HEIGHT_PX/2) / image_height
                    X_CENTER_AXIS_VALUE = X_CENTER_AXIS_VALUE / image_width
                    Y_CENTER_AXIS_VALUE = Y_CENTER_AXIS_VALUE / image_height
                    SHIP_WIDTH_normalized = SHIP_WIDTH_PX / image_width
                    SHIP_HEIGHT_normalized = SHIP_HEIGHT_PX / image_height
                    #print(OBJECT_CLASS, X_CENTER_AXIS_VALUE, Y_CENTER_AXIS_VALUE, SHIP_WIDTH_normalized, SHIP_HEIGHT_normalized)
                    string_to_write = f"{OBJECT_CLASS} {X_CENTER_AXIS_VALUE} {Y_CENTER_AXIS_VALUE} {SHIP_WIDTH_normalized} {SHIP_HEIGHT_normalized}\n"
                    file_object.write(string_to_write)
                    # Close the file       
                    file_object.close()
            else:
                print('Sorry, no ships on the selected area')
#                 file_object = open(directory + '/' + picture_name + '.txt', 'a')
#                 file_object.close()
            print()
    else:
        print('ERROR!! no picture data')
        print('TRY ANNOTATION AGAIN')
        continue

File: 0
ASF file name: S1A_IW_GRDH_1SDV_20210204T015812_20210204T015841_036434_0446E1_2157_TC
AIS file name: interpolated_processed_AIS_2021_02_04.csv
directory: ../data/6_cutted_ASF_data/S1A_IW_GRDH_1SDV_20210204T015812_20210204T015841_036434_0446E1_2157_TC
['subset1.tif', 'subset10.tif', 'subset11.tif', 'subset12.tif', 'subset2.tif', 'subset3.tif', 'subset4.tif', 'subset5.tif', 'subset6.tif', 'subset7.tif', 'subset8.tif', 'subset9.tif']
picture: subset1.tif
picture_name: subset1
image_width: 2000, image_height: 2000
xOrigin: -118.4962518528152, yOrigin: 33.855265740629456

picture: subset10.tif
picture_name: subset10
image_width: 2000, image_height: 2000
xOrigin: -118.1369257391674, yOrigin: 33.67560268380555

picture: subset11.tif
picture_name: subset11
image_width: 2000, image_height: 2000
xOrigin: -118.1369257391674, yOrigin: 33.495939626981645
Sorry, no ships on the selected area

picture: subset12.tif
picture_name: subset12
image_width: 2000, image_height: 2000
xOrigin: -118.136