## crop the images

In [22]:
import os
from osgeo import gdal, ogr
from shapely.geometry import box , Point
import geopandas as gpd
import rasterio
from rasterio.mask import mask

from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### split tiff

In [23]:
def split_tiff(tif_path, out_folder, tile_size=(250, 250)):
    ds = gdal.Open(tif_path)
    if ds is None:
        print(f"Could not open input TIFF file: {tif_path}")
        return
    
    #get raster width and height
    width = ds.RasterXSize
    height = ds.RasterYSize

    #calc. the number of rows and columns
    num_cols = (width + tile_size[0] - 1) // tile_size[0]
    num_rows = (height + tile_size[1] - 1) // tile_size[1]
    
    total_tiles = num_rows * num_cols
    with tqdm(total=total_tiles, desc='splitting tiffs') as pbar:
        for i in range(num_rows):
            for j in range(num_cols):
                #bbox coordinates
                xmin = j * tile_size[0]
                ymin = i * tile_size[1]
                xmax = min((j + 1) * tile_size[0], width)
                ymax = min((i + 1) * tile_size[1], height)
                
                #crop & save
                output_tif = os.path.join(out_folder, f'tile_tiff_{i}_{j}.tif')
                gdal.Translate(output_tif, ds, srcWin=[xmin, ymin, xmax - xmin, ymax - ymin])
                pbar.update(1)
    
#out_folder = '../data/tiles'
#tile_size = (250, 250)  #tile size in pixels
#split_tiff(tif_path, out_folder, tile_size)


### splitting shapefile with splitted tiff file's locations

In [24]:
def split_shp(bigger_shapefile_path, tiff_file_path, output_folder):
    #read the shp
    gdf_bigger = gpd.read_file(bigger_shapefile_path)
    
    #read the tiff and get the bbox
    with rasterio.open(tiff_file_path) as src:
        bbox = box(*src.bounds)
    
    #clip the bigger shapefile to the extent of the TIFF file
    gdf_intersection = gdf_bigger[gdf_bigger.intersects(bbox)]
    
    splitted_name = os.path.basename(tiff_file_path).split('_')
    out_path = os.path.join(output_folder,
                            f'tile_shp_{splitted_name[2]}_{os.path.splitext(splitted_name[3])[0]}.shp')
    gdf_intersection.to_file(out_path)

#tif_path = '../data/actual_data/2023-02-23-bakonyszucs_im_cropped.tif'
#shp_path = '../data/actual_data/Pont.shp'
#out_folder ='../data/distributed'
#split_shp(shp_path, 'output_tiles/tile_tiff_0_0.tif', '../data/tiles')


## concat tiff & shp split

In [26]:
def split(tiff_path, shp_path, output_folder, tile_size=(250, 250)):
    
    split_tiff(tiff_path,output_folder,tile_size)

    extension = '.tif'
    files = os.listdir(output_folder)
    total_files = len(files)
    tiff_files=[]
    with tqdm(total=total_files) as pbar:
        for file in files:
            if file.endswith(extension):
                tiff_files.append(os.path.join(output_folder, file))
            pbar.update(1)
            
    total_files = len(tiff_files)
    with tqdm(total=total_files, desc='splitting shps') as pbar:
        for tiff_path in tiff_files:
            split_shp(shp_path, tiff_path, output_folder)
            pbar.update(1)
    print('success')    

In [None]:
def get_tiff_size(tiff_file_path):
    with rasterio.open(tiff_file_path) as src:
        width = src.width
        height = src.height
    return width, height
print(get_tiff_size('../data/tiles/tile_tiff_1_1.tif'))

## convert SHP to PNG

In [None]:
#matplotlib version

import os
import geopandas as gpd
import matplotlib.pyplot as plt

def convert_SHPtoPNG(shp_path,png_path, point_size=1, dpi = 300, bg_color='black',fg_color='white', fig_size=(50,50)):
    '''
    returns the point as well as a list of tuples
    '''
    
    gdf = gpd.read_file(shp_path)

    xmin, ymin, xmax, ymax = gdf.total_bounds
    print(gdf.total_bounds)
    #plot points as white boxes on a black background
    fig, ax = plt.subplots(figsize=fig_size)
    #ax.set_xlim(xmin, xmax)
    #ax.set_ylim(ymin, ymax)
    ax.set_xlim(0, 250)
    ax.set_ylim(0, 250)
    
    ax.set_facecolor(bg_color)
    ax.scatter(gdf.geometry.x, gdf.geometry.y, s=point_size, color=fg_color) #point size

    #save the plot as a PNG image
    plt.savefig(png_path, bbox_inches='tight', pad_inches=0, facecolor='black', dpi=dpi)
    plt.close()
    
    #points = [(p.x, p.y) for p in gdf.geometry]
    return list(zip(gdf.geometry.x,gdf.geometry.y))#points (X, Y)
    
shp_path = '../data/tiles/tile_shp_0_0.shp'
out_path = '../data/tile_png_0_0_250.png'
points = convert_SHPtoPNG(shp_path,out_path,point_size=5, fig_size=(10,10))
print(points)

In [8]:
#pillow version (working) 

from PIL import Image, ImageDraw
import rasterio
from rasterio.warp import transform_bounds
from pyproj import Transformer

def convert_SHPtoPNG(tiff_path, shp_path, png_path, point_size=1, bg_color='black', fg_color='white'):
    '''
    Returns the point list as well as saves a PNG image with points plotted.
    '''
    gdf = gpd.read_file(shp_path)
    
    target_epsg = 23700
    with rasterio.open(tiff_path) as src:
        src_crs = src.crs
        # Transform the bounding box coordinates to the target EPSG code
        transformer = Transformer.from_crs(src_crs, f'EPSG:{target_epsg}', always_xy=True)
        bbox_transformed = transform_bounds(src_crs, f'EPSG:{target_epsg}', *src.bounds)

    #xmin, ymin, xmax, ymax = gdf.total_bounds
    #print(gdf.total_bounds)
    xmin, ymin, xmax, ymax = bbox_transformed

    img_width = 250
    img_height = 250
    
    #new blank image
    img = Image.new('RGB', (img_width, img_height), color=bg_color)
    draw = ImageDraw.Draw(img)

#    for geom in gdf.geometry:
#        x = int((geom.x - xmin) / (xmax - xmin) * img_width)
#        y = img_height - int((geom.y - ymin) / (ymax - ymin) * img_height)
#        draw.ellipse([x - point_size, y - point_size, x + point_size, y + point_size], fill=fg_color,outline=fg_color)

#    for geom in gdf.geometry:
#        x = int((geom.x - xmin) / (xmax - xmin) * img_width)
#        y = int((geom.y - ymin) / (ymax - ymin) * img_height)
#        draw.ellipse([x - point_size, y - point_size, x + point_size, y + point_size], fill=fg_color, outline=fg_color)

    for geom in gdf.geometry:
        x = int((geom.x - xmin) / (xmax - xmin) * img_width)
        y = int((ymax - geom.y) / (ymax - ymin) * img_height)  # Invert
        draw.ellipse([x - point_size, y - point_size, x + point_size, y + point_size], fill=fg_color, outline=fg_color)

    img.save(png_path)
    img.close()

    #extract points
    points = list(zip(gdf.geometry.x, gdf.geometry.y))  # List of tuples (X, Y)
    return points

In [None]:
#get points with ogr

from osgeo import ogr

shp_path = "../data/tile_shp_0_0_copy.shp"
input_ds = ogr.Open(shp_path)

if input_ds is None:
    print(f"Could not open input shapefile: {shp_path}")
    exit()

# Get the input layer
input_layer = input_ds.GetLayer()

# List to store points
points_list = []

# Loop through features to extract points
for feature in input_layer:
    geom = feature.GetGeometryRef()
    if geom is not None:
        for i in range(geom.GetPointCount()):
            point = geom.GetPoint(i)
            points_list.append(point)

# Close the shapefile
input_ds = None

# Print the list of points
print(points_list)


In [None]:
gdf = gpd.read_file(shp_path)
min_x, min_y, max_x, max_y = gdf.total_bounds

# Calculate the difference between maximum and minimum coordinate values
x_resolution = max_x - min_x
y_resolution = max_y - min_y
print(x_resolution, y_resolution)

### copy shp param.

In [None]:
output_shp = '../data/tile_shp_0_0_copy.shp' #'../data/actual_data/output_points.shp'
input_shp = '../data/tiles/tile_shp_0_0.shp'
input_ds = ogr.Open(shp_path)

if input_ds is None:
    print(f"Could not open input shapefile: {shp_path}")
    exit()
    
# Get the input layer
input_layer = input_ds.GetLayer()

# Create a new shapefile for writing
driver = ogr.GetDriverByName('ESRI Shapefile')
if os.path.exists(output_shp):
    driver.DeleteDataSource(output_shp)
output_ds = driver.CreateDataSource(output_shp)
output_layer = output_ds.CreateLayer(input_layer.GetName(), input_layer.GetSpatialRef(), input_layer.GetGeomType())

# Define the fields and add them to the output layer
field_def_list = []
for i in range(input_layer.GetLayerDefn().GetFieldCount()):
    field_def = input_layer.GetLayerDefn().GetFieldDefn(i)
    field_def_list.append(field_def)
    output_layer.CreateField(field_def)
print(field_def_list, len(field_def_list))
# Copy features from input layer to output layer with modified geometry (e.g., move features)
for feature in input_layer:
    geom = feature.GetGeometryRef()
    # Modify the geometry here (e.g., move the feature)
    # Example: geom.SetPoint(0, NEW_X, NEW_Y)  # Move the first point of the geometry
    new_feature = ogr.Feature(output_layer.GetLayerDefn())
    new_feature.SetGeometry(geom)
    if geom is not None:
        print(geom.GetPoint())
    for i, field_def in enumerate(field_def_list):
        new_feature.SetField(i, feature.GetField(i))
    output_layer.CreateFeature(new_feature)
    new_feature = None

# Close the shapefiles
input_ds = None
output_ds = None

## get the white dots center point on a png image

In [None]:
#very slow compared to opencv
from PIL import Image
import numpy as np
from scipy import ndimage
def getPoints_fromPNG(image_path):
    img = Image.open(image_path)
    
    #grayscale
    img_gray = img.convert('L')
    
    threshold = 254
    img_binary = np.array(img_gray) > threshold
    
    #find connected components in the binary mask
    labeled_img, num_features = ndimage.label(img_binary)
    
    white_dot_centers = []
    
    for label in range(1, num_features + 1):
        #find coordinates of all pixels belonging to the connected component
        rows, cols = np.where(labeled_img == label)
        
        #calc. centroid as the mean of x and y coordinates
        center_x = np.mean(cols)
        center_y = np.mean(rows)
        
        #append centroid
        white_dot_centers.append((center_x, center_y))
    
    return white_dot_centers

white_dot_centers = getPoints_fromPNG(image_path)
print(white_dot_centers)
white_dot_centers.count((1071.0,3.0))
len(white_dot_centers)

In [10]:
#fastest way
import cv2
def getPoints_fromPNG(image_path):
    img = cv2.imread(image_path)
    
    #grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    #threshold
    _, img_binary = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
    
    #find contours
    contours, _ = cv2.findContours(img_binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    white_dot_centers = []
    for contour in contours:
        #calculate the moments
        M = cv2.moments(contour)
        
        #calculate centroid coordinates
        if M["m00"] != 0:
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
            white_dot_centers.append((cX, cY))
    
    return white_dot_centers

#image_path = '../data/tile_png_0_0.png'
#white_dot_centers = getPoints_fromPNG(image_path)
#print(white_dot_centers)
#white_dot_centers.count((1071, 3))
#len(white_dot_centers)

## create shp from points

In [None]:
import os
from osgeo import ogr, osr

In [None]:
def create_shapefile(output_shp, points):
    driver = ogr.GetDriverByName('ESRI Shapefile')
    if os.path.exists(output_shp):
        driver.DeleteDataSource(output_shp)
    output_ds = driver.CreateDataSource(output_shp)
    
    spatial_ref = osr.SpatialReference()
    #spatial_ref.ImportFromEPSG(4326)  # WGS84
    spatial_ref.ImportFromEPSG(23700) # new
    
    #new layer
    output_layer = output_ds.CreateLayer("points", spatial_ref, ogr.wkbPoint)
    
    #define a field for the point ID
    id_field = ogr.FieldDefn("ID", ogr.OFTInteger)
    output_layer.CreateField(id_field)
    
    #create points and add them to the layer
    for i, (x, y) in enumerate(points):
        point = ogr.Geometry(ogr.wkbPoint)
        point.AddPoint(x,y)
        
        feature = ogr.Feature(output_layer.GetLayerDefn())
        feature.SetGeometry(point)
        feature.SetField("ID", i+1)
        output_layer.CreateFeature(feature)
        
        feature = None
    
    output_ds = None


output_shp = '../data/frompng_tile_shp_0_0.shp'
#points = white_dot_centers#[(10, 20), (30, 40), (50, 60)]  # Example list of coordinates

create_shapefile(output_shp, points)


## get tiff informations

In [None]:
import rasterio
from rasterio.warp import transform_bounds
from pyproj import Transformer

In [None]:
def get_tiff_location(tiff_path, target_epsg):
    with rasterio.open(tiff_path) as src:
        src_crs = src.crs
        # Transform the bounding box coordinates to the target EPSG code
        transformer = Transformer.from_crs(src_crs, f'EPSG:{target_epsg}', always_xy=True)
        bbox_transformed = transform_bounds(src_crs, f'EPSG:{target_epsg}', *src.bounds)
        # Convert the transformed coordinates to the desired format (here, simply printing)
        print(f"Bounding box coordinates in EPSG:{target_epsg}: {bbox_transformed}")

# Example usage:
tiff_path = "../data/tiles/tile_tiff_0_0.tif"
target_epsg = 23700  # Example EPSG code (WGS 84)
get_tiff_location(tiff_path, target_epsg)


In [None]:
# get tif info.
tiff_path = '../data/tiles/original/tile_tiff_0_0.tif'
with rasterio.open(tiff_path) as src:
    transform = src.transform  #affine transformation object
    crs = src.crs  #coordinate Reference System
print("Affine transformation:", transform)
print("Coordinate Reference System (CRS):", crs)

## pipeline

In [None]:
import rasterio
from rasterio.transform import from_origin
from rasterio.warp import reproject, Resampling
import geopandas as gpd
from shapely.geometry import Point

In [27]:
# 1. data prep.
#   - splitting geo. images
split('../data/actual_data/2023-02-23-bakonyszucs_im_cropped.tif',
      '../data/actual_data/Pont.shp',
      '../data/tiles/original')

#   - convert to .png for the model
convert_SHPtoPNG('../data/tiles/original/tile_tiff_0_0.tif',
                 '../data/tiles/original/tile_shp_0_0.shp',
                 '../data/tiles/formatted/shp_0_0_png.png',
                 3)
# 2. prediction

# ****
# model pred.
# ****

# 3. get the predicted points's image coordinates
image_coords = getPoints_fromPNG('../data/tiles/formatted/shp_0_0_png.png')

# 4. creating the shapefile
#   - img_coords -> geo_coords.
geo_coords = [transform * (x, y) for x, y in image_coords]

#   - geo_coords -> Points (obj.)
point_geoms = [Point(coord) for coord in geo_coords]

#   - create a GeoDataFrame with the Points
gdf = gpd.GeoDataFrame(geometry=point_geoms, crs=crs)  # Adjust CRS as needed

#   - save the GeoDataFrame as a shapefile
gdf.to_file("../results/predicted_points_0_0.shp")

splitting tiffs: 100%|██████████| 15/15 [00:00<00:00, 107.80it/s]
100%|██████████| 15/15 [00:00<00:00, 200364.84it/s]
  _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs)
splitting shps: 100%|██████████| 15/15 [00:00<00:00, 15.99it/s]


success
