In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from osgeo import gdal, ogr
import easyocr
import geopandas as gpd
import fiona
from shapely.geometry import Point, shape, Polygon, LineString
from shapely.ops import polygonize, unary_union
from shapely import LineString, MultiLineString, MultiPolygon
import os

def process_image(image_path: str, upper_left: tuple, upper_right: tuple, lower_left: tuple, lower_right: tuple):
    try:
        # Step 1: Load and blur the image
        img = cv2.imread(image_path)  # Read the image from the specified path
        if img is None:
            raise FileNotFoundError(f"Image not found: {image_path}")  # Raise an error if the image is not found
        #img = cv2.resize(img,(0,0),fx=0.3,fy=0.3)
        blurred_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Apply Gaussian blur to the image to reduce noise
        

        blurred_img = cv2.GaussianBlur(blurred_img, (9, 9), cv2.BORDER_DEFAULT)
        # Convert the blurred image from BGR to RGB color space
        


        # Convert image to pixel list and cluster using KMeans
        pixels = blurred_img.reshape(-1, 3)  # Reshape the image to a 2D array of pixels
        kmeans = KMeans(n_clusters=5)  # Initialize KMeans with 5 clusters
        kmeans.fit(pixels)  # Fit the KMeans model to the pixel data
        # Create a segmented image based on the cluster centers
        segmented_img = kmeans.cluster_centers_[kmeans.labels_].reshape(blurred_img.shape).astype(np.uint8)

        # Save the segmented image to a file
        cv2.imwrite('segmented_image.tif', cv2.cvtColor(segmented_img, cv2.COLOR_RGB2BGR))
        # Calculate width and height for GeoTIFF creation
        width = upper_right[0] - upper_left[0]
        height = upper_left[1] - lower_left[1]

        # Create a GeoTIFF for the segmented image
        driver = gdal.GetDriverByName('GTiff')  # Get the GeoTIFF driver
        out_raster = driver.Create('segmented_image.tif', segmented_img.shape[1], segmented_img.shape[0], 3, gdal.GDT_Byte)
        # Set the geographical transformation for the raster
        out_raster.SetGeoTransform((upper_left[0], width / segmented_img.shape[1], 0, upper_left[1], 0, -height / segmented_img.shape[0]))

        # Write each color band to the GeoTIFF
        for i in range(3):
            out_raster.GetRasterBand(i + 1).WriteArray(cv2.cvtColor(segmented_img, cv2.COLOR_RGB2BGR)[:, :, i])
        out_raster.FlushCache()  # Save changes to the file
        out_raster = None  # Close the raster dataset

        # Rasterize the segmented image to vector format
        raster_ds = gdal.Open('segmented_image.tif')  # Open the segmented raster dataset
        if not raster_ds:
            raise Exception("Failed to open raster dataset.")  # Raise an error if opening fails

        # Create a GeoJSON data source for the vector output
        vector_ds = ogr.GetDriverByName('GeoJSON').CreateDataSource('file.geojson')
        layer = vector_ds.CreateLayer('polygons', geom_type=ogr.wkbPolygon)  # Create a layer for polygons
        # Polygonize the raster data into vector format
        gdal.Polygonize(raster_ds.GetRasterBand(1), None, layer, -1, [], callback=None)
        vector_ds = None  # Close the vector dataset
        raster_ds = None  # Close the raster dataset

        # Step 2: Text recognition
        # Initialize the EasyOCR reader for Russian language
        reader = easyocr.Reader(['ru'])
        results = reader.readtext(img)

        height, width, _ = img.shape
        points = []

        for (bbox, text, prob) in results:
            top_left = bbox[0]  # Get the top-left corner of the bounding box
            x_ratio = top_left[0] / width  # Calculate the x ratio
            y_ratio = top_left[1] / height  # Calculate the y ratio
            geo_x = upper_left[0] + (upper_right[0] - upper_left[0]) * x_ratio
            geo_y = upper_left[1] + (lower_left[1] - upper_left[1]) * y_ratio
            point = Point(geo_x, geo_y)  # Create a Point object for the coordinates
            points.append({'geometry': point, 'name': text, 'probability': prob})  # Append point data to the list

        text_points_gdf = gpd.GeoDataFrame(points)
        text_points_gdf.set_crs(epsg=4326, inplace=True)

        def assign_landuse(text):
            if text in ["Т1Ж1", "Т2Ж1", "Т1Ж2-1", "Т1Ж2-2", "ТЖ-1", "ТЖ-1-2", "ТЖ-2", "ТЖ-2-1", "ТЖ-2-2", "ТЖ-2-3", "ТЖ-3", "ТЖ-4", "ТЖ-4-1", "ТЖ-5", "ТЖ-5-1", "ТЖ-6", "ТЖ-7"]:
                return "Residential"
            elif text in ["Т3Ж1", "Т3Ж2", "Т2ЖД2", "Т3ЖД3"]:
                return "Mixed-use"
            elif text in ["ТД1-3", "ТД2", "ТД3", "ТД-1", "ТД-1-1", "ТД-1-2", "ТД-4", "ТД-5", "ТД-6", "ТД-7"]:
                return "Business"
            elif text in ["ТР0-1", "ТР0-2", "ТР1", "ТР2", "ТР3-1", "ТР3-2", "ТР4", "ТР5-1", "ТР5-2", "ТР-1", "ТР-2", "ТР-3", "ТР-4", "ТР-5", "ТР-6", "ТР-7"]:
                return "Recreation"
            elif text in ["ТК1", "ТК2", "ТК3", "ТК", "ТК-1", "ТК-2", "ТЗН", "Т3Н-1"]:
                return "Special"
            elif text in ["ТП1", "ТП2", "ТП3", "ТП4", "ТПД1", "ТПД2", "ТП-1", "ТП-2", "ТП-3", "ТП-4", "ТП-5"]:
                return "Industrial"
            elif text in ["ТС1", "ТС-1"]:
                return "Agriculture"
            elif text in ["ТС1", "ТИ1-1", "ТИ1-2", "ТИ2", "ТИ3", "ТИ4", "ТУ", "ТИ-1", "ТИ-2", "ТТ-1", "ТТ-1-1", "ТТ-2", "ТТ-3"]:
                return "Transport"
            else:
                return None    

        text_points_gdf['landuse'] = text_points_gdf['name'].apply(assign_landuse)

        # Step 3: Process GeoJSON
        with fiona.open('file.geojson', 'r') as source:  # Open the GeoJSON file for reading
            geometries = []  # List to store geometries
            attributes = []  # List to store attributes

            # Iterate through each feature in the GeoJSON
            for feature in source:
                geom = shape(feature['geometry'])  # Convert geometry to a shapely object
                attrs = feature['properties']  # Extract properties (attributes)
                geometries.append(geom)  # Append geometry to the list
                attributes.append(attrs)  # Append attributes to the list

        # Create a GeoDataFrame from the attributes and geometries
        filtered_geodata = gpd.GeoDataFrame(attributes, geometry=geometries)
        filtered_geodata.set_crs(4326, inplace=True)  # Set the coordinate reference system to EPSG:4326
        filtered_geodata.to_crs(3857, inplace=True)  # Transform to EPSG:3857 for area calculations
        filtered_geodata.geometry = filtered_geodata.geometry.simplify(1)  # Simplify geometries
        filtered_geodata['area'] = filtered_geodata.geometry.area  # Calculate area of each geometry

        # Align the filter by area using the 25th percentile as a threshold
        threshold = filtered_geodata['area'].quantile(0.25)
        filtered_geodata = filtered_geodata[filtered_geodata['area'] > threshold]  # Filter geometries by area

        # Iterate through filtered_geodata and retain only exterior boundaries of polygons with interiors
        for index, row in filtered_geodata.iterrows():
            geom = row['geometry']
            if geom.interiors:  # Check if the geometry has interior boundaries
                filtered_geodata.at[index, 'geometry'] = Polygon(geom.exterior.coords)  # Keep only the exterior

        # Convert polygons to MultiLineString using _polygons_to_linestring function
        def _polygons_to_linestring(geom):
            def convert_polygon(polygon: Polygon):
                lines = [LineString(polygon.exterior.coords)]  # Create line from exterior
                lines.extend(LineString(p.coords) for p in polygon.interiors)  # Add lines from interiors
                return lines
            if geom.geom_type == "Polygon":
                return MultiLineString(convert_polygon(geom))  # Convert single polygon to MultiLineString
            if geom.geom_type == "MultiPolygon":
                return MultiLineString(sum([convert_polygon(p) for p in geom.geoms], []))  # Convert MultiPolygon
            return geom  # Return the geometry unchanged if not a polygon

        # Combine geometries into new polygons with _combine_geometry function
        def _combine_geometry(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
            crs = gdf.crs  # Get the coordinate reference system
            polygons = polygonize(gdf["geometry"].apply(_polygons_to_linestring).unary_union)  # Combine geometries
            return gpd.GeoDataFrame(geometry=list(polygons), crs=crs)  # Return new GeoDataFrame

        # Transform the GeoDataFrame back to the EPSG:4326 coordinate system
        filtered_geodata = _combine_geometry(filtered_geodata)
        filtered_geodata.to_crs(4326, inplace=True)

        # Merge the filtered GeoDataFrame with another GeoDataFrame by intersection
        merged_gdf = gpd.sjoin(filtered_geodata, text_points_gdf, how="left", predicate="intersects")
        merged_gdf = merged_gdf.reset_index(drop=True)  # Reset index after merging

        return filtered_geodata  
    
    finally:
        # Clean up temporary files
        if os.path.exists('file.geojson'):
            os.remove('file.geojson')

        if os.path.exists('segmented_image.tif'):
            os.remove('segmented_image.tif')

In [15]:
pzz = process_image('155519.png', 
                    (30.492358021,59.877292545), #59.877292545,30.492358021
                    (30.594947179,59.875805700), #59.875805700,30.594947179
                    (30.487330294,59.830103750), #59.830103750,30.487330294
                    (30.591116544 ,59.830741884)) #59.830741884,30.591116544

  super()._check_params_vs_input(X, default_n_init=10)


In [None]:
pzz.to_file('NOVO.geojson')

: 

In [8]:
pzz

AttributeError: 'NoneType' object has no attribute 'copy'

Unnamed: 0,geometry
0,"POLYGON ((30.52777 59.87007, 30.52777 59.86987..."
1,"POLYGON ((30.52777 59.86987, 30.52777 59.87007..."
2,"POLYGON ((30.52519 59.86975, 30.52539 59.86975..."
3,"POLYGON ((30.55217 59.86873, 30.55246 59.86873..."
4,"POLYGON ((30.53435 59.86841, 30.53444 59.86841..."
...,...
654,"POLYGON ((30.53425 59.83231, 30.53415 59.83231..."
655,"POLYGON ((30.52138 59.83868, 30.52129 59.83868..."
656,"POLYGON ((30.59230 59.84797, 30.59230 59.84804..."
657,"POLYGON ((30.58563 59.82308, 30.58563 59.82314..."


In [14]:
pzz.to_file('ffg.geojson')