In [35]:
from shapely import Polygon, MultiPolygon
import math
import os
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
import requests
import osm2geojson
from shapely.ops import unary_union


def get_boundary(osm_id: int):
    overpass_url = "http://lz4.overpass-api.de/api/interpreter"
    overpass_query = f"""
    [out:json];
            (
                relation({osm_id});
            );
    out geom;
    """
    result = requests.get(overpass_url, params={"data": overpass_query})
    json_result = result.json()

    return json_result


def get_routes(osm_id: int, public_transport_type: str):
    overpass_url = "http://lz4.overpass-api.de/api/interpreter"
    overpass_query = f"""
    [out:json];
            (
                relation({osm_id});
            );map_to_area;
            (
                relation(area)['route'='{public_transport_type}'];
            );
    out geom;
    """
    result = requests.get(overpass_url, params={"data": overpass_query})
    json_result = result.json()["elements"]

    return pd.DataFrame(json_result)


def get_routes_by_poly(polygon: Polygon, public_transport_type: str):
    overpass_url = "http://lz4.overpass-api.de/api/interpreter"
    polygon_coords = " ".join(f"{y} {x}" for x, y in polygon.exterior.coords[:-1])
    overpass_query = f"""
    [out:json];
            (
                relation(poly:"{polygon_coords}")['route'='{public_transport_type}'];
            );
    out geom;
    """
    result = requests.post(overpass_url, data={"data": overpass_query})
    if result.status_code == 200:
        json_result = result.json()["elements"]
        return pd.DataFrame(json_result)
    else:
        raise Exception(f"Request failed with status code {result.status_code}")


transport_type = "ferry"
osm_id =3767134
boundary = osm2geojson.json2geojson(get_boundary(osm_id))
boundary = gpd.GeoDataFrame.from_features(boundary["features"]).set_crs(4326)
poly = unary_union(boundary.geometry)
if isinstance(poly,MultiPolygon):
    poly=poly.geoms[0]
data_osm = get_routes_by_poly(poly, transport_type)


In [36]:
from pyproj import CRS
from pyproj.aoi import AreaOfInterest
from pyproj.database import query_utm_crs_info


def find_bounds(bounds):
    df_expanded = pd.json_normalize(bounds)
    min_lat = df_expanded['minlat'].min()
    min_lon = df_expanded['minlon'].min()
    max_lat = df_expanded['maxlat'].max()
    max_lon = df_expanded['maxlon'].max()
    return min_lat, min_lon, max_lat, max_lon


def estimate_crs_for_overpass(bounds):
    min_lat, min_lon, max_lat, max_lon = find_bounds(bounds)
    utm_crs_list = query_utm_crs_info(
        datum_name="WGS 84",
        area_of_interest=AreaOfInterest(
            west_lon_degree=min_lon,
            south_lat_degree=min_lat,
            east_lon_degree=max_lon,
            north_lat_degree=max_lat,
        ),
    )
    return CRS.from_epsg(utm_crs_list[0].code)


local_crs = estimate_crs_for_overpass(data_osm['bounds'])

In [37]:
from scipy.spatial.distance import cdist
from shapely.geometry import Point
from pyproj import Transformer
from shapely.geometry import LineString


def _link_unconnected(connected_ways, threshold):
    # Считаем связи между линиями
    connect_points = [point for coords in connected_ways for point in (coords[0], coords[-1])]
    distances = cdist(connect_points, connect_points)
    n = distances.shape[0]
    mask = (np.arange(n)[:, None] // 2) == (np.arange(n) // 2)
    distances[mask] = np.inf
    indexes = []
    for i in range(len(connected_ways) - 1):
        min_index = np.unravel_index(np.argmin(distances), distances.shape)
        if (distances[min_index] > threshold):
            way_inds = [way_ind for x in indexes for way_ind in (x[0] // 2, x[1] // 2)]
            return _link_unconnected([way for i, way in enumerate(connected_ways) if i in way_inds], threshold)
        distances[min_index[0], :] = np.inf
        distances[min_index[1], :] = np.inf
        distances[:, min_index[0]] = np.inf
        distances[:, min_index[1]] = np.inf
        indexes.append(min_index)

    # Определяем "начальную линию"
    way_inds = [way_ind for x in indexes for way_ind in (x[0] // 2, x[1] // 2)]
    first = next((elem for elem in way_inds if way_inds.count(elem) == 1))

    find_ind = first * 2, first * 2 + 1
    new_connected_ways = None

    # Соединяем воедино
    for i in range(len(connected_ways) - 1):
        index = next((i for i, t in enumerate(indexes) if (find_ind[0] in t or find_ind[1] in t)))
        connection = indexes.pop(index)
        if new_connected_ways is None:
            first = next((x for x in connection if x in find_ind))
            if first % 2 != 0:
                new_connected_ways = connected_ways[first // 2]
            else:
                new_connected_ways = connected_ways[first // 2][::-1]

        next_line = next(x for x in connection if x not in find_ind)
        find_ind = (next_line, next_line + 1) if next_line % 2 == 0 else (next_line, next_line - 1)

        if next_line % 2 == 0:
            new_connected_ways += connected_ways[next_line // 2]
        else:

            new_connected_ways += connected_ways[next_line // 2][::-1]
    return new_connected_ways


PLATFORM_ROLES = ['platform_entry_only', 'platform', 'platform_exit_only']

STOPS_ROLES = ['stop', 'stop_exit_only', 'stop_entry_only']


def parse_overpass_route_response(loc: dict, crs: CRS):
    transformer = Transformer.from_crs('EPSG:4326', crs, always_xy=True)

    def transform_geometry(loc):
        if isinstance(loc['geometry'], float):
            return transformer.transform(loc["lon"], loc["lat"])
        else:
            p = LineString([transformer.transform(coords["lon"], coords["lat"]) for coords in loc['geometry']]).centroid
            return p.x, p.y

    def process_roles(route, roles):
        filtered = route[route['role'].isin(roles)]
        if len(filtered) == 0:
            return None
        else:
            return filtered.apply(transform_geometry, axis=1).tolist()

    if 'ref' in loc['tags'].keys():
        transport_name = loc['tags']['ref']
    elif 'name' in loc['tags'].keys():
        transport_name = loc['tags']['name']
    else:
        transport_name = None

    route = pd.DataFrame(loc["members"])

    platforms = process_roles(route, PLATFORM_ROLES)
    stops = process_roles(route, STOPS_ROLES)

    ways = route[(route["type"] == "way") & (route["role"] == '')]

    if len(ways) > 0:
        ways = ways["geometry"].reset_index(drop=True)
        ways = ways.apply(lambda x: ([transformer.transform(coords["lon"], coords["lat"]) for coords in x])).tolist()

        connected_ways = [[]]
        cur_way = 0
        for coords in ways:
            # Соединяем маршруты, если всё ок, идут без пропусков
            if not connected_ways[cur_way]:
                connected_ways[cur_way] += coords
                continue

            if coords[0] == coords[
                -1]:  # Круговое движение зацикленное зачастую в осм, можно отработать, но сходу не придумал
                continue
            if connected_ways[cur_way][-1] == coords[0]:
                connected_ways[cur_way] += coords[1:]
            elif connected_ways[cur_way][-1] == coords[-1]:
                connected_ways[cur_way] += coords[::-1][1:]
            elif connected_ways[cur_way][0] == coords[0]:
                connected_ways[cur_way] = coords[1:][::-1] + connected_ways[cur_way]
            elif connected_ways[cur_way][0] == coords[-1]:
                connected_ways[cur_way] = coords + connected_ways[cur_way][1:]
            # Случай если нету соединяющей координаты
            else:
                connected_ways += [coords]
                cur_way += 1
        # Соединяем линии по ближайшим точкам этих линий
        if len(connected_ways) > 1:
            # Check if any loops in data and remove it
            to_del = [i for i, data in enumerate(connected_ways) if (data[0] == data[-1])]
            connected_ways = [i for j, i in enumerate(connected_ways) if j not in to_del]
        if len(connected_ways) > 1:
            connected_ways = _link_unconnected(connected_ways, threshold=500)
        else:
            connected_ways = connected_ways[0]

    else:
        connected_ways = None

    return pd.Series({"path": connected_ways, "platforms": platforms, 'stops': stops, 'route': transport_name})


data_geom = data_osm.apply(parse_overpass_route_response, crs=local_crs, axis=1, result_type='expand')

In [48]:
from shapely.ops import substring
from networkx.classes import DiGraph
from scipy.spatial import cKDTree


def geometry_to_graph(loc):
    # graph = DiGraph()
    graph_data = []
    node_id = 0
    name = loc.route
    last_dist = None
    last_projected_stop_id = None
    platforms = loc.platforms
    stops = loc.stops
    path = loc.path
    transport_type = 'bus'

    def add_node(desc, x, y, transport=None):
        if not transport:
            graph_data.append({'node_id': (loc.name, node_id), 'point': (x, y), 'desc': desc, 'route': name})
            # graph.add_node(node_id, x=x, y=y, desc=desc, route=name)
        else:
            graph_data.append(
                {'node_id': (loc.name, node_id), 'point': (x, y), 'desc': desc, 'route': name, 'type': transport_type})
            # graph.add_node(node_id, x=x, y=y, desc=desc, route=name, type=transport_type)

    def add_edge(u, v, geometry=None, desc=None, transport=None):
        if not transport:
            graph_data.append(
                {'u': (loc.name, u), 'v': (loc.name, v), 'geometry': geometry, 'desc': desc, 'route': name})
            # graph.add_edge(u, v, geometry=geometry, desc=desc, route=name)
        else:
            graph_data.append(
                {'u': (loc.name, u), 'v': (loc.name, v), 'geometry': geometry, 'desc': desc, 'route': name,
                 'type': transport_type})
            # graph.add_edge(u, v, geometry=geometry, route=name, type=transport_type)

    def offset_direction(point):
        # 1 if left 0 if right для определения с какой стороны от линии точки
        dist = path.project(point)

        d1 = dist - 1 if dist - 1 > 0 else 0
        d2 = dist + 1 if dist + 1 < path.length else path.length
        line = substring(path, d1, d2)
        x1, y1 = line.coords[0]
        x2, y2 = line.coords[-1]
        x, y = point.coords[0]

        cross_product = (x2 - x1) * (y - y1) - (y2 - y1) * (x - x1)
        if cross_product > 0:
            return 1
        elif cross_product < 0:
            return 0

    def offset_point(point, direction, distance=5):
        # для размещения платформы по одну сторону от пути на расстоянии
        dist = path.project(point)
        d1 = dist - 1 if dist - 1 > 0 else 0
        d2 = dist + 1.1 if dist + 1.1 < path.length else path.length
        nearest_pt_on_line = path.interpolate(dist)
        line = substring(path, d1, d2)

        x1, y1 = line.coords[0]
        x2, y2 = line.coords[-1]

        dx, dy = x2 - x1, y2 - y1
        length = math.sqrt(dx ** 2 + dy ** 2)
        dx, dy = dx / length, dy / length

        if direction == 0:  # Вправо
            nx, ny = dy, -dx
        else:  # Влево
            nx, ny = -dy, dx

        # Смещенная точка
        offset_x = nearest_pt_on_line.x + nx * distance
        offset_y = nearest_pt_on_line.y + ny * distance
        return offset_x, offset_y

    def process_platform(platform): #TODO Если путь из двух остановок, не добавляется эджа
        nonlocal node_id, last_dist, last_projected_stop_id

        dist = path.project(platform)
        projected_stop = path.interpolate(dist)
        platform_to_stop = LineString([platform, projected_stop])
        
        add_node('stop', projected_stop.x, projected_stop.y, transport=True)
        if last_dist is not None:
            cur_path = substring(path, last_dist, dist)
            add_edge(last_projected_stop_id, node_id, desc='routing', geometry=cur_path, transport=True)
        last_projected_stop_id = node_id

        node_id += 1
        add_node('platform', platform.x, platform.y)
        add_edge(node_id - 1, node_id, desc='boarding', geometry=platform_to_stop)
        add_edge(node_id, node_id - 1, desc='boarding', geometry=platform_to_stop)
        node_id += 1
        return dist, last_projected_stop_id, node_id

    if not path:
        print("no path")
        return None
    path = LineString(path)
    # Если нет платформ 
    if not platforms:
        if not stops:  # Строим маршрут по пути начало - конец 
            platforms = [offset_point(path.interpolate(0), 1, 7), offset_point(path.interpolate(path.length), 1, 7)]
        else:  # Если есть только остановки - превращаем их в платформы
            platforms = [offset_point(Point(stop), 1, 7) for stop in stops]
            stops = None

    stops = [] if not stops else stops

    # Если остановок больше чем платформ - найти остановки без платформ и добавить новые платформы
    if len(stops) > len(platforms):
        stop_tree = cKDTree(stops)
        distances, indices = stop_tree.query(platforms)
        connection = [(platforms[platform], stop) for platform, stop in enumerate(indices)]
        connection += [(-1, stop) for stop in set(range(len(stops))) ^ set(indices)]
        connection.sort(key=lambda x: x[1])
        direction = offset_direction(Point(platforms[len(platforms) // 2]))
        stops_to_platforms = {stop: offset_point(Point(stops[stop]), direction, 7) for stop in
                              set(range(len(stops))) ^ set(indices)}
        platforms = [coord if (coord != -1) else (stops_to_platforms.get(ind)) for coord, ind in connection]
        stops = []

    # Если получилось только одна платформа
    if len(platforms) == 1:
        platform = Point(platforms[0])
        dist = path.project(platform)
        if dist == path.length or dist == 0:  # Если платформа является конечной
            platforms = [offset_point(path.interpolate(0), 1, 7), offset_point(path.interpolate(path.length), 1, 7)]
        else:  # Если платформа не является конечной
            platforms = [offset_point(path.interpolate(0), 1, 7), platform,
                         offset_point(path.interpolate(path.length), 1, 7)]
    platforms = [Point(coords) for coords in platforms]
    if len(platforms) >= len(stops):
        for platform in platforms:
            if not last_dist:
                last_dist, last_projected_stop_id, node_id = process_platform(platform)
                if last_dist > path.length / 2:
                    path = path.reverse()
                    last_dist = path.project(platform)
            else:
                last_dist, last_projected_stop_id, node_id = process_platform(platform)

    to_return = pd.DataFrame(graph_data)
    return to_return

res = data_geom.apply(geometry_to_graph, axis=1)
graphs = pd.concat(res.tolist(), ignore_index=True)

In [49]:

platforms = graphs[graphs['desc'] == 'platform']
platforms = platforms.groupby('point').agg({'node_id': list, 'route': list, 'desc': 'first'}).reset_index()

stops = graphs[graphs['desc'] == 'stop'][['point', 'node_id', 'route', 'desc']]
stops['node_id'] = stops['node_id'].apply(lambda x: [x])
stops['route'] = stops['route'].apply(lambda x: [x])
all_nodes = pd.concat([platforms, stops], ignore_index=True).reset_index(drop=True)
mapping = {}
for i, row in all_nodes.iterrows():
    index = row.name
    node_id_list = row['node_id']
    for node_id in node_id_list:
        mapping[node_id] = index


def replace_with_mapping(value):
    return mapping.get(value)


# Применение функции замены к каждому столбцу DataFrame
graphs['u'] = graphs['u'].apply(replace_with_mapping)
graphs['v'] = graphs['v'].apply(replace_with_mapping)
graphs['node_id'] = graphs['node_id'].apply(replace_with_mapping)

edges = graphs[(graphs['desc'] == 'boarding') | (graphs['desc'] == 'routing')]

graph = nx.DiGraph()
for i, node in all_nodes.iterrows():
    route = ','.join(map(str, set(node['route'])))
    graph.add_node(i, x=node['point'][0], y=node['point'][1], desc=node['desc'], route=route)
for i, edge in edges.iterrows():
    graph.add_edge(edge['u'], edge['v'], desc=edge['desc'], route=edge['route'], geometry=str(edge['geometry']))

In [None]:

nx.write_gml(graph, 'test1.gml')

In [50]:
from shapely import from_wkt
import osmnx as ox

def nx_to_gdf(
    graph: nx.MultiDiGraph, nodes: bool = False, edges: bool = False, node_geometry=False, fill_edge_geometry=False
) -> gpd.GeoDataFrame | tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    for _, _, data in graph.edges(data=True):
        if "geometry" in data:
            data["geometry"] = from_wkt(str(data["geometry"]))
    
    if edges and nodes:
        gdf_graph_nodes, gdf_graph_edges = ox.graph_to_gdfs(
            graph, nodes=True, edges=True, node_geometry=node_geometry, fill_edge_geometry=fill_edge_geometry
        )
        gdf_graph_edges = gdf_graph_edges.reset_index()
        return gdf_graph_nodes, gdf_graph_edges
    elif edges:
        gdf_graph_edges = ox.graph_to_gdfs(graph, nodes=False, edges=True, fill_edge_geometry=fill_edge_geometry)
        gdf_graph_edges = gdf_graph_edges.reset_index()
        return gdf_graph_edges
    elif nodes:
        gdf_graph_nodes = ox.graph_to_gdfs(graph, nodes=True, edges=False, node_geometry=node_geometry)
        return gdf_graph_nodes
    raise ValueError("You must specify either nodes or edges as True.")
graph.graph['crs']=local_crs
nodes,edges = nx_to_gdf(nx.MultiDiGraph(graph),nodes=True,edges=True,node_geometry=True)

In [51]:
m1=edges.explore(column='route',tiles='CartoDB positron')
m1.save('rotes.html')

In [None]:
t = data_geom.iloc[3]
platforms = gpd.GeoDataFrame(geometry=t['platforms'], crs=local_crs)
stops = gpd.GeoDataFrame(geometry=t['stops'], crs=local_crs)

connection = gpd.sjoin_nearest(platforms, stops)
connection

In [None]:
from shapely.ops import substring

line = LineString([(0, 0), (2, 2), (5, 5), (10, 0)])

start_distance = 4
end_distance = 6

sub_line = substring(line, 4, 10)
line

In [None]:
m1 = gpd.GeoDataFrame(geometry=[LineString(x) for x in data_geom['path'].iloc[2:3]], crs=local_crs).explore()
gpd.GeoDataFrame(geometry=graph_gdf.iloc[2], crs=local_crs).explore(m=m1, color='red')

In [None]:
from shapely import LineString
import geopandas as gpd

m1 = gpd.GeoDataFrame(geometry=[LineString(x) for x in data_geom['path']], crs=local_crs).reset_index().explore(
    column='index', tiles='CartoDB positron')
# gpd.GeoDataFrame(data_geom.iloc[1]['platforms'], crs=4326).reset_index().explore(m=m1, color='red')
m1

In [None]:
t = [(1, 6), (0, 3), (2, 5)]
for i in range(3):


In [None]:
test = pd.DataFrame(data_osm.iloc[2]['members'])
test = test[(test['type'] == 'node') & (test['role'].isin(['platform_entry_only', 'platform', 'platform_exit_only']))]
test['geometry'] = test.apply(lambda x: (Point(x["lon"], x["lat"])), axis=1)
test.reset_index(drop=True, inplace=True)
gdf = gpd.GeoDataFrame(test.reset_index(), crs=4326)
gdf.geometry = gdf.geometry.buffer(0.0001)
gdf.explore()

In [None]:
test = pd.DataFrame(data_osm.iloc[1]['members'])
test = test[test['type'] == 'way']
test = test["geometry"]
test = test.apply(lambda x: ([(coords["lon"], coords["lat"]) for coords in x]))
# gdf = gpd.GeoDataFrame(test.reset_index(),geometry='geometry',crs=4326)
# gdf.geometry = gdf.geometry.buffer(0.0001)
# gdf.explore()
test

In [None]:
test

#### ПОПЫТКА СОЕДИНЯТЬ ДОРОГИ 

In [None]:
from scipy.spatial import cKDTree
from shapely import LineString, Point
import geopandas as gpd

PLATFORM_ROLES = []


def parse_overpass_route_response(loc: dict):
    if 'ref' in loc['tags'].keys():
        transport_name = loc['tags']['ref']
    else:
        transport_name = loc['tags']['name']
    route = pd.DataFrame(loc["members"])

    ways = route[route["type"] == "way"]

    if len(ways) > 0:
        ways = ways["geometry"].reset_index(drop=True)
        ways = ways.apply(lambda x: ([(coords["lon"], coords["lat"]) for coords in x])).tolist()
        if ways[0][0] == ways[1][0] or ways[0][0] == ways[1][-1]:
            complete_line = ways[0][::-1]
        else:
            complete_line = ways[0]
        for coords in ways[1:]:
            if complete_line[-1] == coords[0]:
                complete_line += coords[1:]
            elif complete_line[-1] == coords[-1]:
                complete_line += coords[::-1][1:]
            else:
                if coords[0] == coords[-1]:
                    continue
                else:
                    tree = cKDTree([coords[0], coords[-1]])
                    _, ind = tree.query(complete_line[-1])
                    if ind == 1:
                        coords = coords[::-1]
                    complete_line += coords
        # print('^^^^^^^^^^')

    else:
        complete_line = None
    if "node" in route["type"].unique():
        platforms = route[(route["type"] == "node")].apply(lambda x: Point(x["lon"], x["lat"]), axis=1)
    else:
        platforms = None

    return pd.Series({"way": complete_line, "platforms": platforms, 'name': transport_name})


data_geom = data_osm.apply(parse_overpass_route_response, axis=1, result_type='expand')
data_geom

In [None]:
test = [LineString(
    [(46.0046822, 51.5426408), (46.0046214, 51.5425402), (46.0042506, 51.541918), (46.0038134, 51.5411845),
     (46.0036437, 51.5408997), (46.0035859, 51.5408011), (46.0035352, 51.5407158), (46.0031053, 51.5399929),
     (46.0028598, 51.5395801), (46.0026303, 51.5391941), (46.0025706, 51.5390938), (46.0023997, 51.5391331),
     (46.00211, 51.5391985), (46.0019414, 51.5392372), (46.0008809, 51.5394906), (46.00081, 51.5395332),
     (46.000665, 51.5396234), (46.0006024, 51.5396705), (46.0005255, 51.5397869), (46.000459, 51.5399285),
     (46.0005332, 51.54006), (46.0007185, 51.5403886), (46.0008003, 51.5405164), (46.0008441, 51.5405848)]), LineString(
    [(46.0046822, 51.5426408), (46.0048226, 51.5426046), (46.0048853, 51.5425884), (46.0065015, 51.5422214),
     (46.0067008, 51.5421762), (46.007038, 51.5420996), (46.0071807, 51.5420672)]), Point(46.0046822, 51.5426408)]

gpd.GeoDataFrame(geometry=test, crs=4326).explore()

In [None]:
gpd.GeoDataFrame(geometry=data_geom['way'].apply(LineString), crs=4326).reset_index().explore(column='index')

In [None]:
print(data_geom['way'].apply(LineString).iloc[1])

In [None]:
data_geom.iloc[16]['way']

In [None]:
ways = data_geom.iloc[16]['way']
ways = ways["geometry"].reset_index(drop=True)
lines = ways.apply(lambda x: ([(coords["lon"], coords["lat"]) for coords in x])).tolist()
if lines[0][0] == lines[1][0]:
    complete_line = lines[0][::-1]
else:
    complete_line = lines[0]
for coords in lines[1:]:
    if complete_line[-1] == coords[0]:
        complete_line += coords[1:]
    elif complete_line[-1] == coords[-1]:
        complete_line += coords[::-1][1:]
    else:
        print('\n\nWTF\n\n')



In [None]:
gpd.GeoDataFrame(geometry=[LineString(complete_line)], crs=4326).explore()

In [None]:

from shapely import MultiPoint

gpd.GeoDataFrame(data=[{'kek': i} for i in data_geom.index.tolist()],
                 geometry=data_geom['platforms'].apply(lambda x: MultiPoint(x)), crs=4326).explore(column='kek')

In [None]:

data_geom.index.tolist()

## Download from microsoft ML footprint

"""
This snippet demonstrates how to access and convert the buildings
data from .csv.gz to geojson for use in common GIS tools. You will
need to install pandas, geopandas, and shapely.
"""

import pandas as pd
import geopandas as gpd
from shapely.geometry import shape

location = 'Russia'

dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv")
greece_links = dataset_links[dataset_links.Location == location]


def convert_to_bytes(size):
    size = str(size)
    if 'KB' in size:
        return float(size.replace('KB', '')) * 1024
    elif 'MB' in size:
        return float(size.replace('MB', '')) * 1024 * 1024
    elif 'B' in size:
        return float(size.replace('B', ''))
    else:
        return float(size)


greece_links['Size_in_bytes'] = greece_links['Size'].apply(convert_to_bytes)
greece_links = greece_links.sort_values(by='Size_in_bytes', ascending=False)
greece_links = greece_links.head(10)

greece_links = greece_links
for _, row in greece_links.iterrows():
    df = pd.read_json(row.Url, lines=True)
    df['geometry'] = df['geometry'].apply(shape)
    gdf = gpd.GeoDataFrame(df, crs=4326)
    gdf.to_file(f"{row.QuadKey}.geojson", driver="GeoJSON")



location = 'Russia'

dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv")
greece_links = dataset_links[dataset_links.Location == location]


def convert_to_bytes(size):
    size = str(size)
    if 'KB' in size:
        return float(size.replace('KB', '')) * 1024
    elif 'MB' in size:
        return float(size.replace('MB', '')) * 1024 * 1024
    elif 'B' in size:
        return float(size.replace('B', ''))
    else:
        return float(size)


greece_links['Size_in_bytes'] = greece_links['Size'].apply(convert_to_bytes)
greece_links = greece_links.sort_values(by='Size_in_bytes', ascending=False)

## Для вшэ временно тут
import math

import geopandas as gpd
f1 = 'Зоны видимости школ (без деревьев).geojson'
f2 = 'Зоны видимости школ (с деревьями).geojson'

d1 = gpd.read_file(f1).to_crs(32636)
d2 = gpd.read_file(f2).to_crs(32636)

d1.drop(columns=['area','perimeter'],inplace=True)
d2.drop(columns=['area','perimeter'],inplace=True)

import pandas as pd
schools = gpd.read_file('Полигоны школ.geojson').to_crs(32636)
schools['ID школы'] = schools['ID школы'].astype(int)
schools['Площадь школы(м2)'] = round(schools.geometry.area,2)
schools['school_geom'] = schools.geometry
d1_ =  pd.merge(d1,schools[['Площадь школы(м2)','ID школы','school_geom']],on='ID школы')
d2_ =  pd.merge(d2,schools[['Площадь школы(м2)','ID школы','school_geom']],on='ID школы')

import math

d1_['Площадь видимости(м2)'] = round(d1_.geometry.area,2)
d2_['Площадь видимости(м2)'] = round(d2_.geometry.area,2)

d1_['Коэффициент видимости'] = d1_.apply(lambda x: round(x.geometry.area / x['school_geom'].buffer(1000).area,5),axis=1)
d2_['Коэффициент видимости'] = d2_.apply(lambda x: round(x.geometry.area / x['school_geom'].buffer(1000).area,5),axis=1)

d1_.drop(columns='school_geom').to_file(f1)
d2_.drop(columns='school_geom').to_file(f2)