# Исследовательский анализ данных

In [None]:
import numpy as np
import pandas as pd
import cv2

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras

from pathlib import Path
from PIL import Image

from tensorflow.keras.preprocessing.image import load_img

Отображаем в качестве примера

In [None]:
path = './data/images/train/origin'
name = '01-356.png'

fullname = path + '/' + name
load_img(fullname)

Узнаем уникальные размены всех изображений:

In [None]:
ROOT = Path("data/images")  # корень с train/val/test
ORIGIN_SUBDIR = "origin"  # где лежат исходники
STD_SIZE = (31920, 1152)  # «правильное» разрешение W×H
EXTS = (".png", ".jpg", ".jpeg")  # какие форматы проверяем


def collect_sizes(root: Path):
    """
    Возвращает словарь {(w, h): [Path, ...]}
    """
    size_map: dict[tuple[int, int], list[Path]] = {}

    for file in root.rglob("*"):
        if file.suffix.lower() not in EXTS:
            continue
        if ORIGIN_SUBDIR not in file.parts:
            continue

        img = cv2.imread(str(file), cv2.IMREAD_UNCHANGED)
        if img is None:
            print(f"⚠️  Не удалось открыть {file}")
            continue

        h, w = img.shape[:2]
        size_map.setdefault((w, h), []).append(file)

    return size_map


def main():
    size_map = collect_sizes(ROOT)

    print("\nУникальные размеры:")
    for (w, h), files in sorted(size_map.items(), key=lambda x: (-len(x[1]), x[0])):
        mark = "✅" if (w, h) == STD_SIZE else "⚠️"
        print(f"{mark} {w}×{h} — {len(files)} файл(ов)")
        if (w, h) != STD_SIZE:
            # выводим несколько примеров «нестандартных»
            for f in files[:5]:
                print(f"   • {f.relative_to(ROOT)}")
            if len(files) > 5:
                print(f"   … и ещё {len(files) - 5} файлов")
    print("\nГотово.")


if __name__ == "__main__":
    main()


In [None]:
from shapely.geometry import Polygon, box   # pip install shapely

PAN_W, PAN_H   = 31920, 1152
TILE_W, TILE_H = 1140, 1140
CROP_TOP       = 6          # если обрезаете
N_TILES        = 28

def load_polygons(txt_path):
    polys = []
    with open(txt_path) as f:
        for line in f:
            parts = line.strip().split()
            cid = int(parts[0])
            coords = list(map(float, parts[1:]))
            pts_abs = [(coords[i]*PAN_W, coords[i+1]*PAN_H)
                       for i in range(0, len(coords), 2)]
            polys.append((cid, Polygon(pts_abs)))
    return polys

def save_tile_polys(tile_idx, tile_polys, out_dir):
    out_txt = out_dir / f"tile_{tile_idx:02d}.txt"
    with open(out_txt, 'w') as f:
        for cid, poly in tile_polys:
            # нормализуем координаты
            coords = []
            for x, y in np.array(poly.exterior.coords):
                x_n = x / TILE_W
                y_n = y / TILE_H
                coords.extend([x_n, y_n])
            line = ' '.join([str(cid)] + [f"{c:.6f}" for c in coords])
            f.write(line + '\n')

def slice_annotations(pan_txt, out_dir):
    out_dir.mkdir(parents=True, exist_ok=True)
    polys = load_polygons(pan_txt)

    for idx in range(N_TILES):
        left = idx * TILE_W
        right = left + TILE_W
        top, bottom = CROP_TOP, CROP_TOP + TILE_H
        tile_box = box(left, top, right, bottom)

        tile_polys = []
        for cid, poly in polys:
            inter = poly.intersection(tile_box)
            if not inter.is_empty and inter.area > 0:
                # переносим координаты к (0,0) тайла
                inter = Polygon([(x-left, y-top) for x, y in inter.exterior.coords])
                tile_polys.append((cid, inter))

        if tile_polys:
            save_tile_polys(idx, tile_polys, out_dir)

if __name__ == '__main__':
    slice_annotations(Path('panorama.txt'), Path('tiles/ann'))
