In [None]:
import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urljoin
import os
import time
from pySmartDL import SmartDL

url         = "https://download.geofabrik.de/south-america/brazil.html"
coutry      = "brazil"
path_data   = "data"
path_output = "external"
path_module = "protobuf"
path_file   = "brazil-latest.osm.pbf"

path_folder = os.path.join(path_data, path_output, path_module)
os.makedirs(path_folder, exist_ok=True)

response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Lista para armazenar links de downloads (ex: arquivos .osm, .pbf, .bz2)
download_protobuf = []
for a in soup.find_all("a", href=True):
    href = a["href"]
    if re.search(rf"{coutry}-latest.*\.(osm|pbf)$", href, flags=re.IGNORECASE):
        # Cria o link completo usando a URL base e o atributo href
        download_protobuf = urljoin(url, href)
        break

download_path = os.path.join(path_folder, path_file)
obj = SmartDL(download_protobuf, download_path, timeout=10)
obj.start(blocking=True)
print(f"Download concluído e salvo como {download_path}")

In [None]:
import os
os.chdir("../")

from modules.geofabrik import ProtobufDownloader

PBD = ProtobufDownloader()

In [None]:
import os
os.chdir("../")

from modules.geofabrik import ProtobufDownloader, convert_pbf_to_osm

# geofabrik = ProtobufDownloader()
convert_pbf_to_osm("data\\external\\protobuf\\brazil-latest.osm.pbf", "data\\external\\protobuf\\brazil-latest.osm")

In [None]:
"""
Restruturar file pbf_to_osm.py para usar os bins da pasta bin para usar o osmconverter do OpenStreetMap

osmconvert64-large.exe brazil-latest.osm.pbf --drop-author --drop-version --verbose --complete-ways --complete-multipolygons --max-objects=500000000 --hash-memory=2048 -o=brazil-latest.osm_fix.pbf
"""

In [None]:
from glob import glob
import subprocess
import platform
import psutil
import time
import math
import os

class OSMConverter:
    
    def __init__(self, minimal_ram: int = 4):
        
        # BASE PATH MODULE
        self.base_name          = "osmconvert"
        self.base_path          = "modules"
        self.folder_module      = "geofabrik"
        self.folder_bin         = "bin"
        self.base_sys           = platform.system()
        self.folder_bits        = f"{''.join([a for a in platform.architecture()[0] if a.isdigit()])}bits"
        self.path_protobufs     = os.path.join("data","external","protobuf")
        self.path_bin           = os.path.join(
            self.base_path, 
            self.folder_module, 
            self.folder_bin, 
            self.base_sys, 
            self.base_name, 
            self.folder_bits
        )
        
        # CONDIÇÃO DE AJUSTE DE BINARIOS PARA NÃO SOBRECARREGAR A RAM
        self.ram_system         = math.ceil(psutil.virtual_memory().total / (1024**3))
        if self.ram_system <= minimal_ram:
            self.path_bin       = self.path_bin.replace("64bits","32bits")
        
        # ESCOLHENDO BINARIO DE CONVERSÃO
        self.file_bin           = ""
        self.files_bin          = glob(os.path.join(self.path_bin, self.base_name+"*"))
        if self.files_bin.__len__() == 1:
            self.file_bin       = self.files_bin[0]
        else:
            if self.ram_system <= minimal_ram: # CASO RAM FOR MENOR QUE 4 E MINIMAL ESTIVER DISPONIVEL USE
                matching_bins   = [b for b in self.files_bin if "minimal" in b]
            else:
                matching_bins   = [b for b in self.files_bin if "minimal" not in b]
            self.file_bin = matching_bins[0] if matching_bins else self.files_bin[0]
            del(matching_bins)

        # BASE PATH FILES
        self.base_data          = "data"
        self.protobufs          = "protobuf"

        # BASE PATH IN FILES
        self.external           = "external"
        self.folder_in_data     = os.path.join(self.base_data, self.external, self.protobufs)
    
        # BASE PATH OUT FILES
        self.processed          = "processed"
        self.folder_out_data    = os.path.join(self.base_data, self.processed, self.protobufs)

    @property
    def input_file(self) -> str:
        return self._input_file

    @input_file.setter
    def input_file(self, name: str) -> None:
        if not isinstance(name, str):
            raise TypeError("input_file must be a string")
        path = os.path.join(self.folder_in_data, name)
        if os.path.exists(path):
            self._input_file = path
        else:
            raise FileExistsError(f"input_file not exists in {self.folder_in_data}")

    @property
    def drop_author(self) -> bool:
        return self._drop_author

    @drop_author.setter
    def drop_author(self, flag: bool) -> None:
        if not isinstance(flag, bool):
            raise TypeError("drop_author must be a boolean")
        self._drop_author = flag

    @property
    def drop_version(self) -> bool:
        return self._drop_version

    @drop_version.setter
    def drop_version(self, flag: bool) -> None:
        if not isinstance(flag, bool):
            raise TypeError("drop_version must be a boolean")
        self._drop_version = flag

    @property
    def verbose(self) -> bool:
        return self._verbose

    @verbose.setter
    def verbose(self, flag: bool) -> None:
        if not isinstance(flag, bool):
            raise TypeError("verbose must be a boolean")
        self._verbose = flag

    @property
    def complete_ways(self) -> bool:
        return self._complete_ways

    @complete_ways.setter
    def complete_ways(self, flag: bool) -> None:
        if not isinstance(flag, bool):
            raise TypeError("complete_ways must be a boolean")
        self._complete_ways = flag

    @property
    def complete_multipolygons(self) -> bool:
        return self._complete_multipolygons

    @complete_multipolygons.setter
    def complete_multipolygons(self, flag: bool) -> None:
        if not isinstance(flag, bool):
            raise TypeError("complete_multipolygons must be a boolean")
        self._complete_multipolygons = flag

    @property
    def max_objects(self) -> int:
        return self._max_objects

    @max_objects.setter
    def max_objects(self, objects: int) -> None:
        if not isinstance(objects, int):
            raise TypeError("max_objects must be an integer")
        self._max_objects = objects

    @property
    def hash_memory(self) -> int:
        return self._hash_memory

    @hash_memory.setter
    def hash_memory(self, ram: int) -> None:
        if not isinstance(ram, int):
            raise TypeError("hash_memory must be an integer")
        self._hash_memory = ram

    @property
    def output_file(self) -> str:
        return self._output_file

    @output_file.setter
    def output_file(self, name: str) -> None:
        if not isinstance(name, str):
            raise TypeError("output_file must be a string")
        if os.path.exists(self.folder_out_data):
            self._output_file = os.path.join(self.folder_out_data, name)
        else:
            raise FileExistsError(f"folder: '{self.folder_out_data}' not exists")

    def run(self):
        # Constroi a lista de argumentos com validação dos atributos
        args = [f"./{self.file_bin}", self._input_file]

        # Para opções booleanas, incluímos o parâmetro somente se existir e for True.
        if hasattr(self, "_drop_author") and self._drop_author:
            args.append("--drop-author")
        if hasattr(self, "_drop_version") and self._drop_version:
            args.append("--drop-version")
        if hasattr(self, "_verbose") and self._verbose:
            args.append("--verbose")
        if hasattr(self, "_complete_ways") and self._complete_ways:
            args.append("--complete-ways")
        if hasattr(self, "_complete_multipolygons") and self._complete_multipolygons:
            args.append("--complete-multipolygons")

        # Para opções numéricas, incluímos o parâmetro se estiver definido.
        if hasattr(self, "_max_objects"):
            args.append(f"--max-objects={self._max_objects}")
        if hasattr(self, "_hash_memory"):
            args.append(f"--hash-memory={self._hash_memory}")

        # Define o arquivo de saída
        args.append(f"-o={self._output_file}")

        # Executa o comando formado
        t_start     = time.time()
        result      = subprocess.run(args, capture_output=True, text=True, check=True)
        t_current   = time.time() - t_start
        end_shell   = result.stdout       if result.stdout != '' else ''
        end_shell   = end_shell + "\n"    if result.stderr != '' else end_shell
        end_shell   = f"{end_shell} \nTempo de Processamento: {t_current}s"
        print(end_shell)

In [76]:
OSMC = OSMConverter()

OSMC.input_file             = 'brazil-latest.osm.pbf'
OSMC.drop_author            = True
OSMC.drop_version           = True
OSMC.verbose                = True
OSMC.complete_ways          = True
OSMC.complete_multipolygons = True
OSMC.max_objects            = 500000000
OSMC.hash_memory            = 4096
OSMC.output_file            = 'brazil-latest.osm.pbf'

OSMC.run()


osmconvert: Verbose mode.
osmconvert Parameter: --complete-ways
osmconvert Parameter: --complete-multipolygons
osmconvert Parameter: --max-objects=500000000
osmconvert Parameter: --hash-memory=4096
osmconvert Parameter: -o=brazil-latest.osm.pbf
osmconvert: File timestamp: 2025-05-01T20:21:34Z
osmconvert: Last processed: relation 19067599.



In [75]:
1024*4

4096

In [None]:
-o=brazil-latest.osm_fix.pbf

['modules\\geofabrik\\bin\\Windows\\osmconvert\\64bits\\osmconvert64-minimal.exe', 'modules\\geofabrik\\bin\\Windows\\osmconvert\\64bits\\osmconvert64.exe']


In [32]:
pattern

'modules\\geofabrik\\bin\\Windows\\osmconvert\\64bits\\osmconvert?'