Extract the training data for the map.

***Import necessary libraries***

In [18]:
import os
from rasterio.plot import show
import numpy as np
import rasterio
import rasterio.features
import pandas as pd
import time
import tifffile as tiff
import random
import feather

Lets create the files variables

In [19]:
contained_file_50 = 'data\contained_segments\contained_segments_50.txt'
contained_file_0001 = 'data\contained_segments\contained_segments_0001.txt'
contained_file_100 = 'data\contained_segments\contained_segments_100.txt'
contained_all_segments = 'data/contained_all_segments.txt'

# Input and Output dataframe

Create the function to load the segments.

In [20]:
def load_data_from_file(contained_file):
    # load the data
    with open(contained_file) as f:
        lines = f.readlines()
        # reteive line infos
        segment_id = []
        polygon_id = []
        class_id = []
        pixels = []
        for line in lines:
            # retrieve segment id
            segment_id.append(line.split(',')[0].split(':')[1].strip())
            # retrieve polygon id
            polygon_id.append(line.split(',')[1].split(':')[1].strip())
            # retrieve class id
            class_id.append(line.split(',')[2].split(':')[1].strip())
            # retrieve pixels
            pixel_list_str = line.split(':')[4]
            pixel_list_str = pixel_list_str[3:-3]
            pixel_list = list(pixel_list_str.split('], ['))
            pixel_list = [pixel.split(', ') for pixel in pixel_list]
            pixel_list = [(int(pixel[0]), int(pixel[1])) for pixel in pixel_list]
            pixels.append(pixel_list)
        
        # create a dataframe
        df = pd.DataFrame()
        df['segment_id'] = segment_id
        df['polygon_id'] = polygon_id
        df['class_id'] = class_id
        df['pixels'] = pixels
        
        return df

Load the segment dataframe.

In [21]:
dataframe_segment = load_data_from_file(contained_file_100)
#dataframe_segment =  load_data_from_file(contained_all_segments)

In [22]:
dataframe_segment.head()

Unnamed: 0,segment_id,polygon_id,class_id,pixels
0,364025,3.0,1.0,"[(1133, 2933), (1133, 2934), (1133, 2935), (11..."
1,367183,4.0,2.0,"[(1144, 2945), (1144, 2946), (1144, 2947), (11..."
2,369134,4.0,2.0,"[(1149, 2941), (1149, 2942), (1149, 2943), (11..."
3,369135,4.0,2.0,"[(1149, 2945), (1149, 2946), (1149, 2947), (11..."
4,370405,4.0,2.0,"[(1153, 2941), (1153, 2942), (1153, 2943), (11..."


In [13]:
# save as text file
output_file = 'D:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb/thibault_test_extraction/processed_data.txt'
dataframe_segment.to_csv(output_file, sep='\t', index=False)

In [14]:
new_file = "data/extraction_train/processed_data.txt"

## Create the perimeter

First get the perimeter

In [23]:
def get_perimeter_pixels(pixels):
    rows = {}
    for pixel in pixels:
        row, col = pixel
        if row not in rows:
            rows[row] = []
        rows[row].append(col)
    perimeter_pixels = []
    for row, cols in rows.items():
        perimeter_pixels.append((row, min(cols)))
        perimeter_pixels.append((row, max(cols)))
    return perimeter_pixels

In [25]:
dataframe_segment['Perimeter Pixels'] = dataframe_segment['pixels'].apply(get_perimeter_pixels)


In [27]:
dataframe_segment.head()

Unnamed: 0,segment_id,polygon_id,class_id,pixels,Perimeter Pixels
0,364025,3.0,1.0,"[(1133, 2933), (1133, 2934), (1133, 2935), (11...","[(1133, 2933), (1133, 2936), (1134, 2933), (11..."
1,367183,4.0,2.0,"[(1144, 2945), (1144, 2946), (1144, 2947), (11...","[(1144, 2945), (1144, 2947), (1145, 2945), (11..."
2,369134,4.0,2.0,"[(1149, 2941), (1149, 2942), (1149, 2943), (11...","[(1149, 2941), (1149, 2944), (1150, 2941), (11..."
3,369135,4.0,2.0,"[(1149, 2945), (1149, 2946), (1149, 2947), (11...","[(1149, 2945), (1149, 2948), (1150, 2945), (11..."
4,370405,4.0,2.0,"[(1153, 2941), (1153, 2942), (1153, 2943), (11...","[(1153, 2941), (1153, 2944), (1154, 2941), (11..."


Then order the perimeter clockwise

In [28]:
def order_pixels_clockwise(pixels):
    # Calculate the centroid of the pixels
    centroid = [sum(pixel[0] for pixel in pixels)/len(pixels), sum(pixel[1] for pixel in pixels)/len(pixels)]
    
    # Sort the pixels by the angle each pixel makes with the centroid
    sorted_pixels = sorted(pixels, key=lambda pixel: np.arctan2(pixel[1] - centroid[1], pixel[0] - centroid[0]))
    
    # Convert the result back to a list of tuples
    return [tuple(pixel) for pixel in sorted_pixels]

In [29]:
dataframe_segment['Perimeter Pixels'] = dataframe_segment['Perimeter Pixels'].apply(lambda x: order_pixels_clockwise(np.array(x)))

In [30]:
dataframe_segment.head()

Unnamed: 0,segment_id,polygon_id,class_id,pixels,Perimeter Pixels
0,364025,3.0,1.0,"[(1133, 2933), (1133, 2934), (1133, 2935), (11...","[(1133, 2933), (1134, 2933), (1135, 2933), (11..."
1,367183,4.0,2.0,"[(1144, 2945), (1144, 2946), (1144, 2947), (11...","[(1144, 2945), (1145, 2945), (1146, 2945), (11..."
2,369134,4.0,2.0,"[(1149, 2941), (1149, 2942), (1149, 2943), (11...","[(1149, 2941), (1150, 2941), (1151, 2941), (11..."
3,369135,4.0,2.0,"[(1149, 2945), (1149, 2946), (1149, 2947), (11...","[(1149, 2945), (1150, 2945), (1151, 2945), (11..."
4,370405,4.0,2.0,"[(1153, 2941), (1153, 2942), (1153, 2943), (11...","[(1153, 2941), (1154, 2941), (1155, 2941), (11..."


In [33]:
# save as text file
output_file = 'D:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb/thibault_test_extraction/processed_data_perimeter_ordered.txt'
dataframe_segment.to_csv(output_file, sep='\t', index=False)

In [34]:
#load the output file back into a dataframe
dataframe_segment = pd.read_csv(output_file, sep='\t')

In [35]:
dataframe_segment.head()

Unnamed: 0,segment_id,polygon_id,class_id,pixels,Perimeter Pixels
0,364025,3.0,1.0,"[(1133, 2933), (1133, 2934), (1133, 2935), (11...","[(1133, 2933), (1134, 2933), (1135, 2933), (11..."
1,367183,4.0,2.0,"[(1144, 2945), (1144, 2946), (1144, 2947), (11...","[(1144, 2945), (1145, 2945), (1146, 2945), (11..."
2,369134,4.0,2.0,"[(1149, 2941), (1149, 2942), (1149, 2943), (11...","[(1149, 2941), (1150, 2941), (1151, 2941), (11..."
3,369135,4.0,2.0,"[(1149, 2945), (1149, 2946), (1149, 2947), (11...","[(1149, 2945), (1150, 2945), (1151, 2945), (11..."
4,370405,4.0,2.0,"[(1153, 2941), (1153, 2942), (1153, 2943), (11...","[(1153, 2941), (1154, 2941), (1155, 2941), (11..."
