In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import kaggle as kg
import os
import shutil
from tqdm.notebook import tqdm

In [4]:
# Importando o dataset
dataset_handle = "abdelghaniaaba/wildfire-prediction-dataset"
destination_path = "./datasets/wildfire-prediction"

os.makedirs(destination_path, exist_ok=True)

# Comentar bloco abaixo em reexecução

#print(f"Downloading dataset to '{destination_path}'...")

#kaggle_command = f'kaggle datasets download -d {dataset_handle} -p "{destination_path}" --unzip'
#print(f'Executing {kaggle_command}\n')

#os.system(kaggle_command)
print("Download Successful")


Download Successful


In [5]:
# Criando diretórios
unsplit_dataset = "./datasets/unsp_wildfire-prediction"
print(f"Unspliting the dataset in '{destination_path}'...")

final_wildfire_path = os.path.join(unsplit_dataset, 'wildfire')
final_no_wildfire_path = os.path.join(unsplit_dataset, 'nowildfire')

os.makedirs(final_wildfire_path, exist_ok=True)
os.makedirs(final_no_wildfire_path, exist_ok=True)

print(f"Folders created in '{unsplit_dataset}'\n")

Unspliting the dataset in './datasets/wildfire-prediction'...
Folders created in './datasets/unsp_wildfire-prediction'



In [5]:
total_files_moved = 0
split_folders = ['train', 'test', 'valid']

for split in split_folders:
    for class_name in ['wildfire', 'nowildfire']:
        source_folder = os.path.join(destination_path, split, class_name)
        destination_folder = os.path.join(unsplit_dataset, class_name)

        if not os.path.isdir(source_folder):
            print(f"Warning: Source folder not found, skipping: {source_folder}")
            continue

        files = os.listdir(source_folder)

        for file_name in tqdm(files, desc=f"Moving from {split}/{class_name}"):
            new_file_name = f"{split}_{file_name}"

            source_file_path = os.path.join(source_folder, file_name)
            destination_file_path = os.path.join(destination_folder, new_file_name)

            shutil.move(source_file_path, destination_file_path)
            total_files_moved += 1

print(f"Process done! Total of {total_files_moved} files moved.")
print(f"Unsplited dataset is at '{unsplit_dataset}'.")

Unspliting the dataset in './datasets/wildfire-prediction'...
Folders created in './datasets/unsp_wildfire-prediction'



Moving from train/wildfire: 100%|██████████████████████████████████████████████| 15750/15750 [00:04<00:00, 3250.90it/s]
Moving from train/nowildfire: 100%|████████████████████████████████████████████| 14500/14500 [00:04<00:00, 3206.65it/s]
Moving from test/wildfire: 100%|█████████████████████████████████████████████████| 3480/3480 [00:01<00:00, 3265.16it/s]
Moving from test/nowildfire: 100%|███████████████████████████████████████████████| 2820/2820 [00:00<00:00, 3209.79it/s]
Moving from valid/wildfire: 100%|████████████████████████████████████████████████| 3480/3480 [00:01<00:00, 3267.54it/s]
Moving from valid/nowildfire: 100%|██████████████████████████████████████████████| 2820/2820 [00:00<00:00, 3220.41it/s]

Process done! Total of 42850 files moved.
Unsplited dataset is at './datasets/unsp_wildfire-prediction'.





In [6]:
# Indo para o processo de extração de características
from desc_imagens.lbp import lbp
import cv2
from joblib import Parallel, delayed


wildfire_files = os.listdir(final_wildfire_path)
no_wildfire_files = os.listdir(final_no_wildfire_path)
output_folder = './output/output_features'
os.makedirs(output_folder, exist_ok=True)

features_file = os.path.join(output_folder, 'features_lbp.npy')
labels_file = os.path.join(output_folder, 'labels_lbp.npy')


In [10]:
def process_image(img_path, label):
    image_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if image_gray is not None:
        feature_vector = lbp(image_gray)
        return feature_vector, label
    return None, None

In [15]:
# Extraindo características usando LBP e 
# salvando as features e as labels referentes em './output/output_features'

tasks = []
classes = {
    'wildfire': 1,
    'nowildfire': 0
}


print(f'Preparing processing task list...')
for class_name, label  in classes.items():
    class_path = os.path.join(unsplit_dataset, class_name)
    image_files = os.listdir(class_path)
    for file_name in image_files:
        tasks.append((os.path.join(class_path, file_name), label))

print(f"Total of {len(tasks)} images to process.")

print("Starting feature extraction...")

# Usando todos os núcleos do CPU
results = Parallel(n_jobs=-1)(
    delayed(process_image)(path, lbl) for path, lbl in tqdm(tasks) 
)

print("\nSpliting features and labels...")
features = []
labels = []

for feature, label in results:
    if feature is not None:
        features.append(feature)
        labels.append(label)

# Convertendo as listas em numpy arrays
print("Converting to numpy array and saving...")

X = np.array(features)
y = np.array(labels)

print(f'Features array dimension (X): {X.shape}')
print(f'Labels array dimension (y): {y.shape}')

print(f"Saving the arrays in '{output_folder}'...")
np.save(features_file, X)
np.save(labels_file, y)
print(f"Files saved: \n- {features_file}\n- {labels_file}")

Preparing processing task list...
Total of 42850 images to process.
Starting feature extraction...


  0%|          | 0/42850 [00:00<?, ?it/s]


Spliting features and labels...
Converting to numpy array and saving...
Features array dimension (X): (42850, 59)
Labels array dimension (y): (42850,)
Saving the arrays in './output/output_features'...
Files saved: 
- ./output/output_features\features_lbp.npy
- ./output/output_features\labels_lbp.npy


In [7]:
# Lendo arrays salvas nas outputs
print('Loading features and labels...')

X = np.load(features_file)
print(f'Features array dimension (X): {X.shape}')
y = np.load(labels_file)
print(f'Labels array dimension (y): {y.shape}')


Features array dimension (X): (42850, 59)
Labels array dimension (y): (42850,)


In [None]:
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.svm import SVC

# Separando conjunto de teste e treino (80% treino, 20% teste)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2)
print(f'Data split into {len(X_train)} train samples and {len(X_test)} test samples.')

# Começando processo de otimização e avalização por validação cruzada
