In [3]:
# Instalação das bibliotecas necessárias
# Execute apenas uma vez
!pip install icrawler opencv-python pandas

Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ---------------------------------------  10.7/11.0 MB 72.8 MB/s eta 0:00:01
   ---------------------------------------  10.7/11.0 MB 72.8 MB/s eta 0:00:01
   ---------------------------------------- 11.0/11.0 MB 26.8 MB/s eta 0:00:00
Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-2.3.3 pytz-2025.2



[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os
import cv2
import pandas as pd
from icrawler.builtin import BingImageCrawler

# Baixando as imagens

In [5]:
base_dir = 'dataset'

In [6]:
IMAGENS_POR_TERMO = 50

estrategia_busca = {
    "dataset_severe_accident": [
        "totaled car accident street",    # Carro perda total
        "wrecked car crash road",         # Carro destruído na estrada
        "carro capotado rodovia",         # Termo em PT-BR
        "severe car collision wreckage",  # Destroços colisão grave
        "smashed car front view"          # Frente destruída
    ],

    "dataset_moderate_accident": [
        "car bumper dent parking lot",    # Amassado parachoque estacionamento
        "fender bender accident",         # Batida leve (termo clássico)
        "batida leve transito",           # Termo PT-BR (traz fotos reais de notícias)
        "car door scratch dent",          # Arranhão/Amassado porta
        "broken tail light car",          # Lanterna quebrada
        "minor car crash street"          # Acidente menor na rua
    ],

    "dataset_no_accident": [
        "sedan car side view street",     # Carro de lado na rua
        "hatchback parked on road",       # Carro estacionado
        "carros no transito cidade",      # Trânsito (cenário real)
        "suv driving on highway",         # Carro andando
        "common car front view street"    # Frente de carro comum
    ]
}

In [7]:
def executar_download_inteligente():

    for nome_pasta, lista_termos in estrategia_busca.items():
        diretorio_destino = os.path.join(base_dir, nome_pasta)

        # Cria a pasta se não existir
        if not os.path.exists(diretorio_destino):
            os.makedirs(diretorio_destino)

        print(f"\n>>> Iniciando downloads para a classe: {nome_pasta.upper()}")

        # Inicializa o acumulador (Meta)
        meta_atual = 0

        for termo in lista_termos:
            # Aumentamos a meta para este termo
            meta_atual += IMAGENS_POR_TERMO

            print(f"   Termo: '{termo}' -> Buscando até completar {meta_atual} imagens na pasta...")

            try:
                # Configura o crawler
                bing_crawler = BingImageCrawler(
                    storage={'root_dir': diretorio_destino},
                    parser_threads=2,
                    downloader_threads=4,
                    log_level='ERROR' # Menos poluição na tela
                )

                # O max_num é a meta cumulativa (50, 100, 150...)
                bing_crawler.crawl(keyword=termo, max_num=meta_atual)

            except Exception as e:
                print(f"   [!] Erro ao buscar '{termo}': {e}")
                # Não paramos o código, apenas vamos para o próximo termo

    print("\n" + "="*40)
    print("DOWNLOAD CONCLUÍDO!")
    print("Verifique as pastas. Agora você deve ter uma grande variedade de imagens.")

In [8]:
executar_download_inteligente()


>>> Iniciando downloads para a classe: DATASET_SEVERE_ACCIDENT
   Termo: 'totaled car accident street' -> Buscando até completar 50 imagens na pasta...


2026-01-13 19:29:19,335 - ERROR - downloader - Response status code 403, file https://www.tuleylaw.com/wp-content/uploads/2017/08/bigstock-Two-Car-Crash-Closeup-1529636.jpg
2026-01-13 19:29:19,452 - ERROR - downloader - Response status code 403, file https://www.askadamskutner.com/wp-content/uploads/2023/01/vehicle-totaled-after-car-accident-opt.jpg
2026-01-13 19:29:21,258 - ERROR - downloader - Response status code 400, file https://media.istockphoto.com/id/1412101026/photo/totaled-car.jpg
2026-01-13 19:29:22,241 - ERROR - downloader - Response status code 403, file https://www.mg4law.com/wp-content/uploads/2017/08/car-wreck-blue-50-1.jpg
2026-01-13 19:29:22,671 - ERROR - downloader - Response status code 403, file https://www.mccoyandsparks.com/wp-content/uploads/2023/06/McCoy-HeroTemplate-1.jpg
2026-01-13 19:29:23,418 - ERROR - downloader - Response status code 400, file https://media.istockphoto.com/id/1474704322/photo/traffic-accident-city-street.jpg
2026-01-13 19:29:24,587 - ERRO

   Termo: 'wrecked car crash road' -> Buscando até completar 100 imagens na pasta...
   Termo: 'carro capotado rodovia' -> Buscando até completar 150 imagens na pasta...
   Termo: 'severe car collision wreckage' -> Buscando até completar 200 imagens na pasta...
   Termo: 'smashed car front view' -> Buscando até completar 250 imagens na pasta...

>>> Iniciando downloads para a classe: DATASET_MODERATE_ACCIDENT
   Termo: 'car bumper dent parking lot' -> Buscando até completar 50 imagens na pasta...


2026-01-13 19:30:05,963 - ERROR - downloader - Response status code 403, file https://preview.redd.it/front-car-bumper-dent-v0-sfk1ed5qxa5a1.jpg
2026-01-13 19:30:06,801 - ERROR - downloader - Response status code 403, file https://denttime.com/wp-content/uploads/2022/04/dent_bumper_web.jpg
2026-01-13 19:30:15,760 - ERROR - downloader - Response status code 403, file https://denttime.com/wp-content/uploads/2023/05/bumperrepairsandiego.jpg
2026-01-13 19:30:20,335 - ERROR - downloader - Response status code 403, file https://www.nicholllaw.com/assets/media/images/car-in-parking-lot-with-dent-social.jpg


   Termo: 'fender bender accident' -> Buscando até completar 100 imagens na pasta...


2026-01-13 19:30:32,272 - ERROR - downloader - Response status code 403, file https://preview.redd.it/today-this-idiot-caused-an-accident-over-a-fender-bender-v0-g8jcz1mzuyy91.jpg
2026-01-13 19:30:32,474 - ERROR - downloader - Response status code 403, file https://preview.redd.it/today-this-idiot-caused-an-accident-over-a-fender-bender-v0-kv0ue3mzuyy91.jpg
2026-01-13 19:30:32,754 - ERROR - downloader - Response status code 403, file https://preview.redd.it/today-this-idiot-caused-an-accident-over-a-fender-bender-v0-vbgmemlzuyy91.jpg
2026-01-13 19:30:32,865 - ERROR - downloader - Response status code 403, file https://preview.redd.it/today-this-idiot-caused-an-accident-over-a-fender-bender-v0-wcguallzuyy91.jpg
2026-01-13 19:30:33,148 - ERROR - downloader - Response status code 403, file https://www.nicholllaw.com/assets/media/images/dented-fender-on-honda-civic-social.jpg
2026-01-13 19:30:33,983 - ERROR - downloader - Response status code 403, file https://burch-george.com/wp-content/u

   Termo: 'batida leve transito' -> Buscando até completar 150 imagens na pasta...


2026-01-13 19:30:43,761 - ERROR - downloader - Response status code 403, file https://webstories.otempo.com.br/wp-content/uploads/2024/04/cropped-iStock-1301170956.jpg
2026-01-13 19:30:47,124 - ERROR - downloader - Response status code 404, file https://pedrobastoscarconsulting.pt/wp-content/uploads/2024/03/carros-veiculos-automoveis-transito-alegria-dicas-conducao-sem-stress-1.jpg


   Termo: 'car door scratch dent' -> Buscando até completar 200 imagens na pasta...
   Termo: 'broken tail light car' -> Buscando até completar 250 imagens na pasta...


2026-01-13 19:31:03,825 - ERROR - downloader - Response status code 403, file https://preview.redd.it/2f31719pokz81.jpg
2026-01-13 19:31:08,965 - ERROR - downloader - Exception caught when downloading file https://www.carparts.com/blog/wp-content/uploads/2022/08/damaged-tail-light.jpg, error: HTTPSConnectionPool(host='www.carparts.com', port=443): Read timed out. (read timeout=5), remaining retry times: 2
2026-01-13 19:31:14,300 - ERROR - downloader - Exception caught when downloading file https://www.carparts.com/blog/wp-content/uploads/2022/08/damaged-tail-light.jpg, error: HTTPSConnectionPool(host='www.carparts.com', port=443): Read timed out. (read timeout=5), remaining retry times: 1
2026-01-13 19:31:20,636 - ERROR - downloader - Exception caught when downloading file https://www.carparts.com/blog/wp-content/uploads/2022/08/damaged-tail-light.jpg, error: HTTPSConnectionPool(host='www.carparts.com', port=443): Read timed out. (read timeout=5), remaining retry times: 0


   Termo: 'minor car crash street' -> Buscando até completar 300 imagens na pasta...

>>> Iniciando downloads para a classe: DATASET_NO_ACCIDENT
   Termo: 'sedan car side view street' -> Buscando até completar 50 imagens na pasta...


2026-01-13 19:31:35,696 - ERROR - downloader - Response status code 400, file https://media.istockphoto.com/id/494093356/photo/generic-black-car-side-view.jpg
2026-01-13 19:31:39,026 - ERROR - downloader - Response status code 400, file https://media.istockphoto.com/id/465876806/photo/street-in-shanghai-lujiazui-china.jpg


   Termo: 'hatchback parked on road' -> Buscando até completar 100 imagens na pasta...
   Termo: 'carros no transito cidade' -> Buscando até completar 150 imagens na pasta...


2026-01-13 19:32:04,907 - ERROR - downloader - Exception caught when downloading file https://motorshow.com.br/wp-content/uploads/sites/2/2024/10/transito01.jpg, error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), remaining retry times: 2
2026-01-13 19:32:05,336 - ERROR - downloader - Exception caught when downloading file https://motorshow.com.br/wp-content/uploads/sites/2/2024/10/transito01.jpg, error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), remaining retry times: 1
2026-01-13 19:32:05,706 - ERROR - downloader - Exception caught when downloading file https://motorshow.com.br/wp-content/uploads/sites/2/2024/10/transito01.jpg, error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), remaining retry times: 0


   Termo: 'suv driving on highway' -> Buscando até completar 200 imagens na pasta...
   Termo: 'common car front view street' -> Buscando até completar 250 imagens na pasta...

DOWNLOAD CONCLUÍDO!
Verifique as pastas. Agora você deve ter uma grande variedade de imagens.


# Iniciando a criação do CSV

## Definição das classes e seus números (Rótulos)

In [9]:
classes_num = {
    "dataset_severe_accident": 0,
    "dataset_moderate_accident": 1,
    "dataset_no_accident": 2
}

img_size = 64

dados_lista = []

## Processamento

In [10]:
for nome_pasta, rotulo in classes_num.items():

  count = 0

  caminho_pasta = os.path.join(base_dir, nome_pasta)

  if not os.path.exists(caminho_pasta):
    print(f"ERRO: A pasta '{nome_pasta}' não foi encontrada em {base_dir}")
    continue

  for nome_arquivo in os.listdir(caminho_pasta):
    try:
      caminho_img = os.path.join(caminho_pasta, nome_arquivo)

      img_matriz = cv2.imread(caminho_img, cv2.IMREAD_GRAYSCALE)

      if img_matriz is None:
        continue

      img_resized = cv2.resize(img_matriz, (img_size, img_size))

      img_vectorized = img_resized.flatten()

      dados_imagem = list(img_vectorized)
      dados_imagem.append(rotulo)

      dados_lista.append(dados_imagem)
      count += 1

    except Exception as e:
      print(f"Erro ao ler {nome_arquivo}: {e}")

  print(f"Processadas {count} imagens na pasta '{nome_pasta}'.")

Processadas 97 imagens na pasta 'dataset_severe_accident'.
Processadas 124 imagens na pasta 'dataset_moderate_accident'.
Processadas 141 imagens na pasta 'dataset_no_accident'.


## Salvando o Arquivo

In [11]:
colunas = [f'pixel_{i}' for i in range(img_size * img_size)]
colunas.append('rotulo (target)')

df = pd.DataFrame(dados_lista, columns=colunas)

caminho_csv = os.path.join(base_dir, 'dados_acidentes.csv')

df.to_csv(caminho_csv, index=False)

## Exibindo estatísticas

In [12]:
df

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_4087,pixel_4088,pixel_4089,pixel_4090,pixel_4091,pixel_4092,pixel_4093,pixel_4094,pixel_4095,rotulo (target)
0,133,185,245,127,28,48,76,66,23,26,...,17,28,6,5,8,7,25,44,9,0
1,142,143,145,147,149,151,152,153,155,154,...,19,19,15,21,18,20,20,20,17,0
2,69,77,80,76,70,125,77,73,64,88,...,11,18,13,71,71,123,144,142,144,0
3,16,19,19,27,22,16,18,16,16,16,...,139,17,25,16,23,23,90,113,50,0
4,249,246,252,252,252,252,252,252,252,252,...,204,204,205,203,201,195,179,191,181,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,176,110,133,174,82,115,184,65,56,45,...,45,54,57,83,79,47,50,50,70,2
358,42,32,24,28,42,36,25,34,33,24,...,27,22,19,19,19,58,21,15,40,2
359,150,151,152,152,153,154,154,154,154,154,...,61,55,128,156,166,148,164,158,144,2
360,89,94,103,106,111,113,116,120,123,127,...,12,16,13,14,13,12,12,13,11,2


In [13]:
df.groupby('rotulo (target)').size()

rotulo (target)
0     97
1    124
2    141
dtype: int64