## Предсказание жилых зданий


In [1]:
from osm_living_predictor.downloader import OSMDownloader
from osm_living_predictor.building_processor import BuildingProcessor
from osm_living_predictor.road_processor import RoadProcessor
from osm_living_predictor.amenity_processor import AmenityProcessor
from osm_living_predictor.feature_builder import FeatureBuilder
from osm_living_predictor.model_handler import ModelHandler


In [2]:
downloader = OSMDownloader(12030887)
bounds = downloader.load_boundary()

In [3]:
# Обработка
buildings = BuildingProcessor(bounds)
buildings.load_buildings()

In [4]:
roads = RoadProcessor(bounds, buildings.buildings)
roads.load_roads()

In [5]:
amenities = AmenityProcessor(bounds, roads.backup_data)
amenities.load_amenities()

In [6]:
# Признаки
builder = FeatureBuilder(buildings, roads, amenities)
data = builder.build_features()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['landuse'].replace([


In [None]:
# Создание модели

from sklearn.ensemble import RandomForestClassifier

external_model = RandomForestClassifier(random_state=42)
handler = ModelHandler("model/model_dt.pkl", df=data, target_col="is_living")
handler.set_model(external_model)

X_train, X_test, y_train, y_test = handler.train_test_split()
handler.train_model(X_train, y_train)

predicted = handler.predict(X_test, map_labels=True)




[ModelHandler] Внешняя модель установлена.
[CV] Accuracy: mean=0.8293, std=0.0031
[ModelHandler] Модель сохранена: model_dt.pkl


In [7]:
# Загрузка модели

handler = ModelHandler("model/model_dt.pkl", df=data, target_col="is_living")
handler.load_model_from_file()

predicted = handler.predict(data, map_labels=False)

[ModelHandler] Модель успешно загружена из файла.


## Предсказание этажности


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from osm_height_predictor.geo import (BuildingPreprocessor, 
                                GeometryFeatureGenerator, 
                                SpatialStatisticsComputer, 
                                SpatialNeighborhoodAnalyzer, 
                                StoreyModelTrainer)

In [None]:
# Define the folder path
folder_path = "data/data_spb+towns/input_data_for_train"

pkl_files = [
    os.path.join(folder_path, f)
    for f in os.listdir(folder_path)
    if f.endswith(".pickle")
]

df_list = [pd.read_pickle(f).to_crs(4326) for f in pkl_files]
df_building = (
    pd.concat(df_list, ignore_index=True)
    .rename(columns={"building:levels": "storey"})
    .dropna(subset=["storey"])
)

df_building["storey"] = df_building["storey"].astype(int)

df_building = df_building[df_building["is_living"] == 1].reset_index(drop=True)
df_building = df_building[(df_building["storey"] > 2)]


print(f"Loaded {len(df_list)} files.")
print(df_building.info())

In [None]:
# 1. Предобработка
prep = BuildingPreprocessor(df_building)
prep.filter_residential()
df = prep.get()

In [None]:
# 2. Геометрические признаки
geo_gen = GeometryFeatureGenerator(df)
df = geo_gen.compute_geometry_features()

In [None]:
# 3. Пространственный анализ
stats = SpatialStatisticsComputer(df)
df, global_moran, lisa = stats.compute_moran_and_lisa(col="storey")

In [None]:
# 4. Соседние признаки
analyzer = SpatialNeighborhoodAnalyzer(df)
df = analyzer.compute_neighborhood_metrics()

In [None]:
# 5. Обучение модели
trainer = StoreyModelTrainer(df)
X_train, X_test, y_train, y_test = trainer.prepare_data()
param_dist = {
                "n_estimators": [100],
            }

model = trainer.train_rf(X_train, y_train, param_dist=param_dist)

In [None]:
y_pred = model.predict(X_test)

def plot_target_vs_prediction(
    y_true, y_pred, bins=30, title="RF Target vs Prediction Histogram"
):
    plt.figure(figsize=(10, 6))
    plt.hist(y_true, bins=bins, alpha=0.6, label="True (Target)", density=True)
    plt.hist(y_pred, bins=bins, alpha=0.6, label="Predicted", density=True)
    plt.xlabel("Value")
    plt.ylabel("Density")
    plt.title(title)
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()


plot_target_vs_prediction(y_test, y_pred)