In [None]:
pip install -r requirements.txt

In [233]:
import pyarrow.parquet as pq
import geopandas as gpd
import pandas as pd
import jupyter_black
import re

In [28]:
jupyter_black.load()

# Объединяем все данные в один большой shapefile

In [None]:
import geopandas as gpd
import os


def merge_shapefiles(input_folder, output_file):
    """
    Читает все Shape-файлы из указанной папки, фильтрует по типу геометрии и объединяет их в один файл.

    Параметры:
    - input_folder (str): Путь к папке, содержащей Shape-файлы.
    - output_file (str): Имя выходного файла для сохранения объединенного Shape-файла.
    """
    # Получаем список всех Shape-файлов в указанной папке
    shapefiles = [
        os.path.join(input_folder, file)
        for file in os.listdir(input_folder)
        if file.endswith(".shp")
    ]

    # Инициализируем список для хранения GeoDataFrames одного типа
    merged_gdf = None
    geometry_type = None

    for shapefile in shapefiles:
        gdf = gpd.read_file(shapefile)

        # Если это первый файл, сохраняем тип геометрии
        if merged_gdf is None:
            merged_gdf = gdf
            geometry_type = gdf.geometry.geom_type.iloc[0]
        else:
            # Проверяем тип геометрии и добавляем только подходящие GeoDataFrame
            if gdf.geometry.geom_type.iloc[0] == geometry_type:
                merged_gdf = gpd.GeoDataFrame(
                    pd.concat([merged_gdf, gdf], ignore_index=True)
                )
            else:
                print(
                    f"Пропущен файл {shapefile}: тип геометрии {gdf.geometry.geom_type.iloc[0]} не совпадает с {geometry_type}"
                )

    # Сохраняем объединенный Shape-файл, если есть данные
    if merged_gdf is not None:
        merged_gdf.to_file(output_file)
        print(f"Объединенные Shape-файлы сохранены в {output_file}")
    else:
        print("Нет данных для объединения.")


# Пример использования функции
input_folder = "src/kaliningrad-latest-free/"  # Путь к папке с Shape-файлами
output_file = "src/Kaliningrad/kaliningrad-latest.shp"  # Путь и имя для сохранения объединенного файла

merge_shapefiles(input_folder, output_file)

# Считывание файла региона

In [None]:
data = gpd.read_file('src/north-caucasus-fed-district-latest.osm.pbf')

In [13]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

In [100]:
data.head(2)

Unnamed: 0,osm_id,name,barrier,highway,ref,address,is_in,place,man_made,other_tags,geometry,population
0,34395335,Беслан,,,,,"Северная Осетия, Россия",town,,"""addr:country""=>""RU"",""addr:district""=>""Правобережный район"",""int_name""=>""Beslan"",""name:az""=>""Beslan"",""name:ca""=>""Beslan"",""name:ce""=>""Беслан"",""name:de""=>""Beslan"",""name:en""=>""Beslan"",""name:et""=>""Beslan"",""name:fr""=>""Beslan"",""name:hy""=>""Բեսլան"",""name:lt""=>""Beslanas"",""name:os""=>""Беслæн"",""name:ru""=>""Беслан"",""name:sv""=>""Beslan"",""name:tt""=>""Беслан"",""name:zh""=>""别斯兰"",""official_status""=>""ru:город"",""population""=>""37063"",""population:date""=>""2015-01-01"",""wikidata""=>""Q105035"",""wikipedia""=>""ru:Беслан""",POINT (44.53757 43.18756),37063
1,34395338,Назрань,,,,,,city,,"""addr:country""=>""RU"",""addr:region""=>""Ингушетия"",""int_name""=>""Nazran"",""name:ar""=>""نازران"",""name:bg""=>""Назран"",""name:ca""=>""Nazran"",""name:ce""=>""Несара"",""name:de""=>""Nasran"",""name:el""=>""Ναζράν"",""name:en""=>""Nazran"",""name:es""=>""Nazrán"",""name:et""=>""Nazran"",""name:eu""=>""Nazran"",""name:fr""=>""Nazran"",""name:he""=>""נזראן"",""name:hr""=>""Nazranj"",""name:inh""=>""Нана-Наьсаре"",""name:ja""=>""ナズラン"",""name:ka""=>""ნაზრანი"",""name:lt""=>""Nazranė"",""name:ml""=>""നസ്രാൻ"",""name:pl""=>""Nazrań"",""name:ro""=>""Nazran"",""name:ru""=>""Назрань"",""name:sk""=>""Nazraň"",""name:sr""=>""Назран"",""name:tr""=>""Nazran"",""name:uk""=>""Назрань"",""name:xmf""=>""ნაზრანი"",""name:zh-Hans""=>""纳兹兰"",""name:zh-Hant""=>""納茲蘭"",""official_status""=>""ru:город"",""population""=>""124169"",""population:date""=>""2021-01-01"",""start_date""=>""1781"",""wikidata""=>""Q163935"",""wikipedia""=>""ru:Назрань""",POINT (44.75401 43.23208),124169


In [96]:
def find_population(line: str):
    pattern = r'"population"=>"(.*?)"'
    population = re.findall(pattern, line)
    if population and population[0] != "":
        return int(population[0])
    else:
        return 0

In [98]:
data["population"] = data.other_tags.apply(lambda x: find_population(str(x)))

In [105]:
data.place.unique()

array(['town', 'city', 'village', None, 'locality', 'hamlet',
       'neighbourhood', 'suburb', 'state', 'isolated_dwelling',
       'allotments', 'farm', 'quarter', 'square', 'yard', 'plot'],
      dtype=object)

In [109]:
data[data.place.isin(["town", "city", "village"])]

Unnamed: 0,osm_id,name,barrier,highway,ref,address,is_in,place,man_made,other_tags,geometry,population
0,34395335,Беслан,,,,,"Северная Осетия, Россия",town,,"""addr:country""=>""RU"",""addr:district""=>""Правобережный район"",""int_name""=>""Beslan"",""name:az""=>""Beslan"",""name:ca""=>""Beslan"",""name:ce""=>""Беслан"",""name:de""=>""Beslan"",""name:en""=>""Beslan"",""name:et""=>""Beslan"",""name:fr""=>""Beslan"",""name:hy""=>""Բեսլան"",""name:lt""=>""Beslanas"",""name:os""=>""Беслæн"",""name:ru""=>""Беслан"",""name:sv""=>""Beslan"",""name:tt""=>""Беслан"",""name:zh""=>""别斯兰"",""official_status""=>""ru:город"",""population""=>""37063"",""population:date""=>""2015-01-01"",""wikidata""=>""Q105035"",""wikipedia""=>""ru:Беслан""",POINT (44.53757 43.18756),37063
1,34395338,Назрань,,,,,,city,,"""addr:country""=>""RU"",""addr:region""=>""Ингушетия"",""int_name""=>""Nazran"",""name:ar""=>""نازران"",""name:bg""=>""Назран"",""name:ca""=>""Nazran"",""name:ce""=>""Несара"",""name:de""=>""Nasran"",""name:el""=>""Ναζράν"",""name:en""=>""Nazran"",""name:es""=>""Nazrán"",""name:et""=>""Nazran"",""name:eu""=>""Nazran"",""name:fr""=>""Nazran"",""name:he""=>""נזראן"",""name:hr""=>""Nazranj"",""name:inh""=>""Нана-Наьсаре"",""name:ja""=>""ナズラン"",""name:ka""=>""ნაზრანი"",""name:lt""=>""Nazranė"",""name:ml""=>""നസ്രാൻ"",""name:pl""=>""Nazrań"",""name:ro""=>""Nazran"",""name:ru""=>""Назрань"",""name:sk""=>""Nazraň"",""name:sr""=>""Назран"",""name:tr""=>""Nazran"",""name:uk""=>""Назрань"",""name:xmf""=>""ნაზრანი"",""name:zh-Hans""=>""纳兹兰"",""name:zh-Hant""=>""納茲蘭"",""official_status""=>""ru:город"",""population""=>""124169"",""population:date""=>""2021-01-01"",""start_date""=>""1781"",""wikidata""=>""Q163935"",""wikipedia""=>""ru:Назрань""",POINT (44.75401 43.23208),124169
2,34395341,Магас,,,,,,town,,"""addr:country""=>""RU"",""addr:district""=>""городской округ Магас"",""addr:region""=>""Ингушетия"",""admin_level""=>""4"",""int_name""=>""Magas"",""name:az""=>""Maqas"",""name:ca""=>""Magàs"",""name:ce""=>""МагӀас"",""name:de""=>""Magas"",""name:en""=>""Magas"",""name:et""=>""Magass"",""name:fr""=>""Magas"",""name:hr""=>""Magas"",""name:hu""=>""Marac"",""name:ka""=>""მაგასი"",""name:kbd""=>""Мэгъэс"",""name:lt""=>""Magasas"",""name:ru""=>""Магас"",""name:sk""=>""Magas"",""name:sl""=>""Magas"",""name:sr""=>""Магас"",""name:zh""=>""马加斯"",""name:zh-Hans""=>""马加斯"",""name:zh-Hant""=>""馬加斯"",""official_status""=>""ru:город"",""population""=>""6880"",""population:date""=>""2016-01-01"",""wikidata""=>""Q5222"",""wikipedia""=>""ru:Магас""",POINT (44.80482 43.16665),6880
3,34395346,Тарское,,,,,,village,,"""addr:country""=>""RU"",""addr:district""=>""Пригородный район"",""int_name""=>""Tarskoye"",""name:ce""=>""Ангушт"",""name:de""=>""Tarskoje"",""name:en""=>""Tarskoye"",""name:inh""=>""Ангушт"",""name:os""=>""Тарскæй"",""name:ru""=>""Тарское"",""population""=>""4371"",""wikidata""=>""Q4452146"",""wikipedia""=>""ru:Тарское""",POINT (44.77362 42.96597),4371
4,36714827,Ставрополь,,,,,,city,,"""addr:country""=>""RU"",""addr:district""=>""городской округ Ставрополь"",""addr:postcode""=>""355000"",""addr:region""=>""Ставропольский край"",""admin_level""=>""4"",""capital""=>""4"",""contact:email""=>""ipriem@stavadm.ru"",""contact:phone""=>""+7 8652 264176"",""contact:website""=>""http://ставрополь.рф"",""int_name""=>""Stavropol"",""is_in:country""=>""Russia"",""name:ar""=>""ستافروبول"",""name:az""=>""Stavropol"",""name:ca""=>""Stàvropol"",""name:de""=>""Stawropol"",""name:el""=>""Σταυρούπολη"",""name:en""=>""Stavropol"",""name:et""=>""Stavropol"",""name:eu""=>""Stavropol"",""name:fr""=>""Stavropol"",""name:hr""=>""Stavropolj"",""name:hsb""=>""Stawropol"",""name:hu""=>""Stravlopol"",""name:hy""=>""Ստավրոպոլ"",""name:ka""=>""სტავროპოლი"",""name:kbd""=>""Шъэт-Къалэ"",""name:ko""=>""스타브로폴"",""name:lt""=>""Stavropolis"",""name:ml""=>""സ്റ്റാവ്രോപോൾ"",""name:nl""=>""Stavropol"",""name:os""=>""Стъарапол"",""name:pl""=>""Stawropol"",""name:ro""=>""Stavropol"",""name:ru""=>""Ставрополь"",""name:sk""=>""Stavropoľ"",""name:sl""=>""Stavropol"",""name:sr""=>""Ставропољ"",""name:tr""=>""Stavropol"",""name:uk""=>""Ставрополь"",""name:zh""=>""斯塔夫罗波尔"",""name:zh-Hans""=>""斯塔夫罗波尔"",""name:zh-Hant""=>""斯塔夫羅波爾"",""official_status""=>""ru:город"",""old_name""=>""Ставрополь-Кавказский; Ворошиловск;"",""population""=>""550147"",""population:date""=>""2023-01-01"",""start_date""=>""1777"",""wikidata""=>""Q5206"",""wikipedia""=>""ru:Ставрополь""",POINT (41.96909 45.04332),550147
...,...,...,...,...,...,...,...,...,...,...,...,...
139378,7506031649,Хапиль,,,,,,village,,"""wikidata""=>""Q20924819"",""wikipedia""=>""ru:Хапиль""",POINT (47.98123 41.98801),0
143680,7939337934,Верхний Каранай,,,,,,village,,"""wikidata""=>""Q13616256"",""wikipedia""=>""ru:Верхний Каранай""",POINT (46.90487 42.82566),0
165257,9720011403,Камышкутан,,,,,,village,,,POINT (46.62162 43.49607),0
172438,10281018880,Андрюшин,,,,,,village,,,POINT (43.24974 45.70659),0


In [112]:
data[["name", "geometry"]][data["name"] == "Назрань"]

Unnamed: 0,name,geometry
1,Назрань,POINT (44.75401 43.23208)
5919,Назрань,POINT (44.76932 43.22943)
99275,Назрань,POINT (44.76931 43.22946)
191328,Назрань,POINT (44.76933 43.22941)


In [172]:
data = geopandas.read_file("src\Kaliningrad\kaliningrad-latest.shp")

  data = geopandas.read_file("src\Kaliningrad\kaliningrad-latest.shp")


In [173]:
data.fclass.unique()

array(['building', 'scrub', 'forest', 'industrial', 'recreation_ground',
       'military', 'commercial', 'grass', 'park', 'cemetery', 'retail',
       'residential', 'allotments', 'quarry', 'farmland', 'orchard',
       'meadow', 'farmyard', 'nature_reserve', 'heath', 'vineyard',
       'beach', 'peak', 'town', 'island', 'village', 'locality', 'hamlet',
       'city', 'suburb', 'farm', 'christian_catholic', 'christian',
       'christian_orthodox', 'christian_protestant', 'christian_lutheran',
       'christian_evangelical', 'jewish', 'attraction', 'camp_site',
       'bar', 'theatre', 'supermarket', 'sports_centre', 'stadium',
       'pitch', 'graveyard', 'hospital', 'market_place', 'prison',
       'university', 'fountain', 'castle', 'school', 'college', 'florist',
       'car_dealership', 'museum', 'dentist', 'public_building',
       'doityourself', 'fort', 'embassy', 'mall', 'furniture_shop',
       'hotel', 'stationery', 'swimming_pool', 'police', 'ruins',
       'wastewater_pla

In [177]:
print(len(data[data["fclass"] == "city"]))

7


In [179]:
print(len(data[data["population"] > 0]))

81


In [181]:
data["fclass"][data["population"] > 0].unique()

array(['town', 'village', 'city', 'hamlet'], dtype=object)

In [None]:
data[(data["fclass"] == "city") & (data["name"].notna()) & (data["population"] > 0)]

In [209]:
geo_df = data[
    (
        data["fclass"].isin(
            [
                "city",  # город
                "town",  # небольшой город
                "village",  # деревни
                "hamlet",  # поселки
                "suburb",  # села снт поселения
            ]
        )
    )
    & (data["name"].notna())
]

In [232]:
geo_df.head(2)

Unnamed: 0,osm_id,code,fclass,name,type,population,geometry
153961,39382058,1002,town,Лесной,,344.0,"POLYGON ((20.60587 55.01235, 20.60734 55.01395, 20.61254 55.01677, 20.6127 55.01686, 20.61366 55.01663, 20.61659 55.01592, 20.61893 55.01607, 20.61916 55.01548, 20.61951 55.01527, 20.61997 55.01509, 20.62016 55.01394, 20.61997 55.01336, 20.61984 55.01292, 20.61966 55.01259, 20.61969 55.01227, 20.6197 55.01149, 20.62001 55.01122, 20.62034 55.01025, 20.62054 55.01017, 20.62048 55.00993, 20.62026 55.00982, 20.62006 55.00923, 20.62077 55.00901, 20.62083 55.00902, 20.62101 55.00897, 20.62075 55.0083, 20.61985 55.00774, 20.61919 55.00693, 20.61914 55.00691, 20.61891 55.00687, 20.61874 55.00687, 20.61808 55.0067, 20.61748 55.00705, 20.61714 55.00687, 20.6177 55.00647, 20.61726 55.00626, 20.61619 55.00616, 20.61602 55.00608, 20.616 55.00602, 20.61592 55.00601, 20.6159 55.00606, 20.61574 55.00608, 20.61564 55.00611, 20.61478 55.00607, 20.61393 55.00721, 20.60587 55.01235))"
153965,6411591,1002,town,Ясное,,1464.0,"POLYGON ((21.52625 55.16973, 21.5276 55.17334, 21.52801 55.17411, 21.53016 55.17531, 21.5304 55.17593, 21.53258 55.17736, 21.53559 55.1795, 21.53603 55.1798, 21.54709 55.18344, 21.55028 55.18318, 21.55178 55.18295, 21.55891 55.18038, 21.56178 55.1821, 21.56233 55.18251, 21.5631 55.18218, 21.56946 55.18284, 21.56977 55.18132, 21.56902 55.18121, 21.565 55.18069, 21.56477 55.17895, 21.56953 55.17864, 21.56946 55.17762, 21.56377 55.17229, 21.5636 55.17213, 21.56125 55.17097, 21.55075 55.17096, 21.54779 55.17096, 21.54738 55.17096, 21.54349 55.17095, 21.54087 55.1692, 21.53991 55.16939, 21.54005 55.16971, 21.53934 55.16983, 21.53577 55.1696, 21.53498 55.16856, 21.53405 55.16903, 21.53331 55.16923, 21.52954 55.1702, 21.52935 55.16985, 21.52917 55.16937, 21.5273 55.1696, 21.52625 55.16973))"


In [212]:
geo_df.to_file("kaliningrad.geojson", driver="GeoJSON")

In [262]:
geo_df.head(2)

Unnamed: 0,osm_id,code,fclass,name,type,population,geometry
153961,39382058,1002,town,Лесной,,344.0,"POLYGON ((20.60587 55.01235, 20.60734 55.01395, 20.61254 55.01677, 20.6127 55.01686, 20.61366 55.01663, 20.61659 55.01592, 20.61893 55.01607, 20.61916 55.01548, 20.61951 55.01527, 20.61997 55.01509, 20.62016 55.01394, 20.61997 55.01336, 20.61984 55.01292, 20.61966 55.01259, 20.61969 55.01227, 20.6197 55.01149, 20.62001 55.01122, 20.62034 55.01025, 20.62054 55.01017, 20.62048 55.00993, 20.62026 55.00982, 20.62006 55.00923, 20.62077 55.00901, 20.62083 55.00902, 20.62101 55.00897, 20.62075 55.0083, 20.61985 55.00774, 20.61919 55.00693, 20.61914 55.00691, 20.61891 55.00687, 20.61874 55.00687, 20.61808 55.0067, 20.61748 55.00705, 20.61714 55.00687, 20.6177 55.00647, 20.61726 55.00626, 20.61619 55.00616, 20.61602 55.00608, 20.616 55.00602, 20.61592 55.00601, 20.6159 55.00606, 20.61574 55.00608, 20.61564 55.00611, 20.61478 55.00607, 20.61393 55.00721, 20.60587 55.01235))"
153965,6411591,1002,town,Ясное,,1464.0,"POLYGON ((21.52625 55.16973, 21.5276 55.17334, 21.52801 55.17411, 21.53016 55.17531, 21.5304 55.17593, 21.53258 55.17736, 21.53559 55.1795, 21.53603 55.1798, 21.54709 55.18344, 21.55028 55.18318, 21.55178 55.18295, 21.55891 55.18038, 21.56178 55.1821, 21.56233 55.18251, 21.5631 55.18218, 21.56946 55.18284, 21.56977 55.18132, 21.56902 55.18121, 21.565 55.18069, 21.56477 55.17895, 21.56953 55.17864, 21.56946 55.17762, 21.56377 55.17229, 21.5636 55.17213, 21.56125 55.17097, 21.55075 55.17096, 21.54779 55.17096, 21.54738 55.17096, 21.54349 55.17095, 21.54087 55.1692, 21.53991 55.16939, 21.54005 55.16971, 21.53934 55.16983, 21.53577 55.1696, 21.53498 55.16856, 21.53405 55.16903, 21.53331 55.16923, 21.52954 55.1702, 21.52935 55.16985, 21.52917 55.16937, 21.5273 55.1696, 21.52625 55.16973))"


In [263]:
geo_df["type"].unique()

array([None], dtype=object)

In [224]:
geo_gdf = gpd.GeoDataFrame(geo_df, geometry="geometry")

In [259]:
geo_gdf = gpd.read_file("kaliningrad.geojson")

In [230]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point


def spatial_join_with_location(df, polygons_gdf):
    """
    Пространственное объединение датафрейма с геолокацией человека и полигонов.

    Параметры:
    - df: DataFrame с колонками ['username', 'client', 'latitude', 'longitude']
    - polygons_gdf: GeoDataFrame с полигонами, к которым нужно привязать геолокации.

    Возвращает:
    - GeoDataFrame с результатом пространственного объединения.
    """
    # Преобразуем точки в геометрический формат Point
    df["geometry"] = df.apply(
        lambda row: Point(row["longitude"], row["latitude"]), axis=1
    )

    # Преобразуем DataFrame в GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry="geometry")

    # Установим систему координат (например, WGS84)
    gdf.set_crs(epsg=4326, inplace=True)

    # Выполняем пространственное объединение
    result_gdf = gpd.sjoin(gdf, polygons_gdf, how="left")

    return result_gdf


# Пример данных с геолокацией пользователя
data = {
    "username": ["user1"],
    "client": ["client1"],
    "latitude": [54.633186],
    "longitude": [20.908047],
}
df = pd.DataFrame(data)

# Пример GeoDataFrame с полигонами (например, города или области)
# Полигональные данные могут быть загружены из файлов типа Shapefile или GeoJSON
# polygons_gdf = gpd.read_file('path_to_polygon_shapefile.shp')

# Пример использования функции (нужно загрузить реальные полигональные данные)
result = spatial_join_with_location(df, geo_gdf)

# print(result)

In [231]:
result

Unnamed: 0,username,client,latitude,longitude,geometry,index_right,osm_id,code,fclass,name,type,population
0,user1,client1,54.633186,20.908047,POINT (20.90805 54.63319),331,974610084,1003,village,Озерки,,0.0
