In [159]:
import os
import pandas as pd

# Caminho para a pasta de imagens
images_path = '/teamspace/studios/this_studio/unsupervised_disaster/extracted_data/images'

# Função para processar os nomes das imagens
def process_image_metadata(images_folder):
    data = []

    # Iterar sobre todos os arquivos na pasta de imagens
    for filename in os.listdir(images_folder):
        if filename.endswith('.png') or filename.endswith('.jpg'):
            # Separar informações do nome da imagem
            parts = filename.split('_')
            disaster_type = parts[0]  # Exemplo: 'guatemala-volcano' ou 'socal-fire'
            disaster_status = "Com desastre" if "post" in parts[-2] else "Sem desastre"
            image_reference = os.path.splitext(filename)[0]  # Nome sem extensão
            
            # Adicionar informações à lista
            data.append({
                "Image Reference": image_reference,
                "Image Name": filename,
                "Disaster Type": disaster_type,
                "Disaster Status": disaster_status
            })
    
    # Criar DataFrame
    df = pd.DataFrame(data)
    return df

# Processar imagens e criar DataFrame
image_metadata_df = process_image_metadata(images_path)

In [160]:
image_metadata_df

Unnamed: 0,Image Reference,Image Name,Disaster Type,Disaster Status
0,hurricane-harvey_00000015_pre_disaster,hurricane-harvey_00000015_pre_disaster.png,hurricane-harvey,Sem desastre
1,hurricane-harvey_00000228_pre_disaster,hurricane-harvey_00000228_pre_disaster.png,hurricane-harvey,Sem desastre
2,hurricane-michael_00000261_pre_disaster,hurricane-michael_00000261_pre_disaster.png,hurricane-michael,Sem desastre
3,hurricane-matthew_00000103_post_disaster,hurricane-matthew_00000103_post_disaster.png,hurricane-matthew,Com desastre
4,hurricane-harvey_00000216_pre_disaster,hurricane-harvey_00000216_pre_disaster.png,hurricane-harvey,Sem desastre
...,...,...,...,...
5593,guatemala-volcano_00000008_pre_disaster,guatemala-volcano_00000008_pre_disaster.png,guatemala-volcano,Sem desastre
5594,palu-tsunami_00000056_post_disaster,palu-tsunami_00000056_post_disaster.png,palu-tsunami,Com desastre
5595,socal-fire_00001156_post_disaster,socal-fire_00001156_post_disaster.png,socal-fire,Com desastre
5596,socal-fire_00000537_pre_disaster,socal-fire_00000537_pre_disaster.png,socal-fire,Sem desastre


In [161]:
# Contagem por tipo de desastre natural
disaster_counts = image_metadata_df['Disaster Type'].value_counts()

# Contagem de imagens com desastre e sem desastre (total e por tipo de desastre)
status_counts_total = image_metadata_df['Disaster Status'].value_counts()
status_counts_by_disaster = image_metadata_df.groupby(['Disaster Type', 'Disaster Status']).size()

# Exibir os resultados
print("Contagem por tipo de desastre natural:")
print(disaster_counts)

print("\nContagem total de imagens com desastre e sem desastre:")
print(status_counts_total)

print("\nContagem de imagens com desastre e sem desastre por tipo de desastre:")
print(status_counts_by_disaster)

Contagem por tipo de desastre natural:
Disaster Type
socal-fire             1646
hurricane-michael       686
hurricane-harvey        638
hurricane-florence      638
midwest-flooding        558
hurricane-matthew       476
santa-rosa-wildfire     452
mexico-earthquake       242
palu-tsunami            226
guatemala-volcano        36
Name: count, dtype: int64

Contagem total de imagens com desastre e sem desastre:
Disaster Status
Sem desastre    2799
Com desastre    2799
Name: count, dtype: int64

Contagem de imagens com desastre e sem desastre por tipo de desastre:
Disaster Type        Disaster Status
guatemala-volcano    Com desastre        18
                     Sem desastre        18
hurricane-florence   Com desastre       319
                     Sem desastre       319
hurricane-harvey     Com desastre       319
                     Sem desastre       319
hurricane-matthew    Com desastre       238
                     Sem desastre       238
hurricane-michael    Com desastre       3

In [162]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
import torch.nn.functional as F


In [173]:
# Configurações gerais
IMG_HEIGHT, IMG_WIDTH = 224, 224
BATCH_SIZE = 32
EPOCHS = 10
LEARNING_RATE = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Caminhos
image_dir = '/Users/thiago/Desktop/dataset_AMNS/train/images'

# 1. Preparação dos dados
df = image_metadata_df.copy()
df['Image Path'] = df['Image Name'].apply(lambda x: os.path.join(image_dir, x))
df['Label'] = df['Disaster Status'].map({'Com desastre': 1, 'Sem desastre': 0})

# Divisão em treino e validação
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['Label'], random_state=42)

In [174]:
# Classe que o GPT criou para organizar o dataset
class DisasterDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['Image Path']
        label = self.dataframe.iloc[idx]['Label']
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(label, dtype=torch.float32)

In [175]:
# Transformações 
data_transforms = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [176]:
# Criar datasets e dataloaders
train_dataset = DisasterDataset(train_df, transform=data_transforms)
val_dataset = DisasterDataset(val_df, transform=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [177]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1)
        self.flatten_size = None  # Será calculado dinamicamente
        self.fc1 = nn.Linear(1, 120)  # Placeholder
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        if self.flatten_size is None:  # Calcular tamanho apenas uma vez
            self.flatten_size = x.numel() // x.size(0)
            self.fc1 = nn.Linear(self.flatten_size, 120).to(x.device)

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [178]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = self.shortcut(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += residual  # Soma o atalho à saída convolucional
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, num_classes=1):
        super(ResNet, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.layer1 = ResidualBlock(64, 64, stride=1)
        self.layer2 = ResidualBlock(64, 128, stride=2)
        self.layer3 = ResidualBlock(128, 128, stride=2)
        self.layer4 = ResidualBlock(128, 256, stride=2)

        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.global_avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


In [179]:
import torch
from torch import nn
torch.backends.nnpack.enabled = False

# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Instanciar o modelo
model = ResNet(num_classes=1).to(device)
#model = LeNet().to(device)
model = model.float()
print(model)

# altere a função de custo e o otimizador
#loss_fn = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

Using mps device
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (layer1): ResidualBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (shortcut): Sequential()
  )
  (layer2): ResidualBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 

In [180]:
def train(dataloader, model, loss_fn, optimizer):
    model.train()
    total_loss = 0
    correct = 0
    for X, y in dataloader:
        X, y = X.to(device), y.to(device).unsqueeze(1)  # Ajustar dimensões do rótulo
        X = X.to(torch.float32)
        #print(f"X dtype: {X.dtype}, X shape: {X.shape}")
        #print(f"y dtype: {y.dtype}, y shape: {y.shape}")
    
        # Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        correct += ((torch.sigmoid(pred) > 0.5) == y).type(torch.float).sum().item()
    
    avg_loss = total_loss / len(dataloader)
    accuracy = correct / len(dataloader.dataset)
    return avg_loss, accuracy

# Função de validação
def validate(dataloader, model, loss_fn):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device).unsqueeze(1)
            pred = model(X)
            loss = loss_fn(pred, y)
            total_loss += loss.item()
            correct += ((torch.sigmoid(pred) > 0.5) == y).type(torch.float).sum().item()
    
    avg_loss = total_loss / len(dataloader)
    accuracy = correct / len(dataloader.dataset)
    return avg_loss, accuracy

In [181]:
EPOCHS = 5

In [182]:
from tqdm import tqdm, trange

train_loss_hist = []
val_loss_hist = []
train_acc_hist = []
val_acc_hist = []

for epoch in trange(EPOCHS, desc="Epochs", unit="epoch"):
    train_loss, train_acc = train(train_loader, model, loss_fn, optimizer)
    val_loss, val_acc = validate(val_loader, model, loss_fn)

    train_loss_hist.append(train_loss)
    val_loss_hist.append(val_loss)
    train_acc_hist.append(train_acc)
    val_acc_hist.append(val_acc)

    tqdm.write(f"Epoch {epoch+1}/{EPOCHS}")
    tqdm.write(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    tqdm.write(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

print("Treinamento concluído!")


Epochs:  20%|██        | 1/5 [08:01<32:06, 481.70s/epoch]

Epoch 1/5
Train Loss: 0.6298, Train Acc: 0.6447
Val Loss: 0.6118, Val Acc: 0.6616


Epochs:  40%|████      | 2/5 [17:31<26:40, 533.61s/epoch]

Epoch 2/5
Train Loss: 0.5715, Train Acc: 0.7057
Val Loss: 0.5923, Val Acc: 0.6580


Epochs:  60%|██████    | 3/5 [25:40<17:06, 513.11s/epoch]

Epoch 3/5
Train Loss: 0.5407, Train Acc: 0.7269
Val Loss: 0.4883, Val Acc: 0.7589


Epochs:  80%|████████  | 4/5 [33:40<08:20, 500.23s/epoch]

Epoch 4/5
Train Loss: 0.5028, Train Acc: 0.7544
Val Loss: 0.6071, Val Acc: 0.7054


Epochs: 100%|██████████| 5/5 [45:46<00:00, 549.21s/epoch]

Epoch 5/5
Train Loss: 0.4923, Train Acc: 0.7660
Val Loss: 0.4426, Val Acc: 0.7920
Treinamento concluído!



