## imports

In [2]:
import os
import shutil
import re
import cv2
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, f1_score

## Data Augmentation

In [None]:
orig_dir = 'dataset_redimensionado'
aug_dir = 'dataset_aumentado'

if os.path.exists(aug_dir):
    shutil.rmtree(aug_dir)
shutil.copytree(orig_dir, aug_dir, dirs_exist_ok=True)
for root, dirs, files in os.walk(aug_dir):
    for file in files:
        os.remove(os.path.join(root, file))

datagen_train = ImageDataGenerator(
    zoom_range=0.1,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='constant',
    cval=0
)

datagen_val_test = ImageDataGenerator(
    horizontal_flip=True,
    brightness_range=[0.9, 1.1],
    rescale=1./255
)

n_aug_dict = {
    'train': {'poluido': 2, 'nao_poluido': 9},
    'val': {'poluido': 0, 'nao_poluido': 1},
    'test': {'poluido': 0, 'nao_poluido': 1}
}

for split in ['train', 'val', 'test']:
    for classe in ['poluido', 'nao_poluido']:
        src_folder = os.path.join(orig_dir, split, classe)
        dst_folder = os.path.join(aug_dir, split, classe)
        os.makedirs(dst_folder, exist_ok=True)
        n_aug = n_aug_dict[split][classe]

        if split == 'train':
            datagen = datagen_train
            apply_suavizar = True
        else:
            datagen = datagen_val_test
            apply_suavizar = False

        for fname in os.listdir(src_folder):
            if not fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
            img_path = os.path.join(src_folder, fname)
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)
            array_to_img(x[0]).save(os.path.join(dst_folder, fname))

            if n_aug > 0:
                i = 0
                for batch in datagen.flow(x, batch_size=1):
                    new_fname = f"{os.path.splitext(fname)[0]}_aug{i+1}.jpg"
                    array_to_img(batch[0]).save(os.path.join(dst_folder, new_fname))
                    i += 1
                    if i >= n_aug:
                        break

print("✅ Augmentation finalizado. Imagens salvas em:", aug_dir)

## Treinamento com Transfer Learning e MobileNetV2

In [None]:
train_dir = 'dataset_aumentado/train'
val_dir = 'dataset_aumentado/val'
test_dir = 'dataset_aumentado/test'

train_gen = ImageDataGenerator(rescale=1./255)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_directory(
    train_dir, target_size=(224, 224), batch_size=16, class_mode='binary'
)
val_data = val_gen.flow_from_directory(
    val_dir, target_size=(224, 224), batch_size=16, class_mode='binary'
)
test_data = test_gen.flow_from_directory(
    test_dir, target_size=(224, 224), batch_size=16, class_mode='binary', shuffle=False
)

result_file = 'resultados_20_execucoes2.txt'
if os.path.exists(result_file):
    os.remove(result_file)

best_f1 = 0

all_acc = []
all_val_acc = []
all_loss = []
all_val_loss = []
all_conf_matrices = []

print("\n✅ Iniciando as 20 execuções...\n")

# ========================
# Execuções (Treino 20x)
# ========================
for exec_num in range(1, 21):
    print(f"\n========== Execução {exec_num} ==========\n")

    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    preds = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=preds)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

    total_train = train_data.classes.shape[0]
    class_0 = np.sum(train_data.classes == 0)
    class_1 = np.sum(train_data.classes == 1)
    weight_for_0 = (1 / class_0) * (total_train / 2.0)
    weight_for_1 = (1 / class_1) * (total_train / 2.0)
    class_weights = {0: weight_for_0, 1: weight_for_1}

    history = model.fit(
        train_data,
        validation_data=val_data,
        epochs=20,
        class_weight=class_weights,
        verbose=2
    )

    all_acc.append(history.history['accuracy'])
    all_val_acc.append(history.history['val_accuracy'])
    all_loss.append(history.history['loss'])
    all_val_loss.append(history.history['val_loss'])

    loss, acc = model.evaluate(test_data, verbose=0)
    y_true = test_data.classes
    y_pred_probs = model.predict(test_data, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    report = classification_report(y_true, y_pred, target_names=['nao_poluido', 'poluido'])
    conf_matrix = confusion_matrix(y_true, y_pred)
    all_conf_matrices.append(conf_matrix)
    f1 = f1_score(y_true, y_pred)

    with open(result_file, 'a') as f:
        f.write(f'\n========== Execução {exec_num} ==========\n')
        f.write(f'Train Accuracy: {history.history["accuracy"][-1]:.4f}\n')
        f.write(f'Train Loss: {history.history["loss"][-1]:.4f}\n')
        f.write(f'Val Accuracy: {history.history["val_accuracy"][-1]:.4f}\n')
        f.write(f'Val Loss: {history.history["val_loss"][-1]:.4f}\n')
        f.write(f'Test Accuracy: {acc:.4f}\n')
        f.write(f'F1 no Teste: {f1:.4f}\n')
        f.write('\nClassification Report:\n')
        f.write(report)
        f.write('\nConfusion Matrix:\n')
        f.write(np.array2string(conf_matrix))
        f.write('\n\n')

    if f1 > best_f1:
        best_f1 = f1
        model.save('melhor_modelo.h5')
        print(f'✅ Novo melhor modelo salvo com F1: {best_f1:.4f}')

np.save('all_acc.npy', np.array(all_acc))
np.save('all_val_acc.npy', np.array(all_val_acc))
np.save('all_loss.npy', np.array(all_loss))
np.save('all_val_loss.npy', np.array(all_val_loss))
np.save('all_conf_matrices.npy', np.array(all_conf_matrices))

print("\n✅ As 20 execuções terminaram. Resultados salvos no arquivo:", result_file)
print("✅ Históricos de treino e matrizes de confusão salvos em arquivos .npy")

## Calculos dos valores médios obtidos

In [None]:
# ==============================================
# Parte 2 - Cálculo das médias das 20 execuções
# ==============================================
metrics_to_average = ['Train Accuracy', 'Train Loss', 'Val Accuracy', 'Val Loss', 'Test Accuracy', 'F1 no Teste']
results = {metric: [] for metric in metrics_to_average}

with open(result_file, 'r') as f:
    lines = f.readlines()
    for line in lines:
        for metric in metrics_to_average:
            if line.startswith(metric):
                value = float(re.findall(r"[-+]?\d*\.\d+|\d+", line.strip().split(':')[-1])[0])
                results[metric].append(value)

summary_file = 'resumo_final.txt'
with open(summary_file, 'w') as f:
    f.write("Médias das 20 Execuções:\n\n")
    for metric in metrics_to_average:
        values = results[metric]
        mean = np.mean(values)
        std = np.std(values)
        f.write(f'{metric}: Média = {mean:.4f} | Desvio Padrão = {std:.4f}\n')

print("\n✅ Resumo final com as médias salvo em:", summary_file)

## BoxPlot

In [None]:
plt.figure(figsize=(10,6))
plt.boxplot([results[m] for m in metrics_to_average], labels=metrics_to_average, showmeans=True)
plt.title('Distribuição das métricas nas 20 execuções')
plt.ylabel('Valor')
plt.grid(True)
plt.show()