In [71]:
import kagglehub
import os

# Download latest version
path = kagglehub.dataset_download("fmena14/volcanoesvenus")
files = os.listdir(path)

print("Path:", path)
print(files)

Path: /Users/gabrieledurante/.cache/kagglehub/datasets/fmena14/volcanoesvenus/versions/1
['Volcanoes_train', 'Volcanoes_test']


In [72]:
import numpy as np
import pandas as pd

X_train = pd.read_csv(os.path.join(path, 'volcanoes_train', 'train_images.csv'))
y_train = pd.read_csv(os.path.join(path, 'volcanoes_train', 'train_labels.csv'))
X_test = pd.read_csv(os.path.join(path, 'volcanoes_test', 'test_images.csv'))
y_test = pd.read_csv(os.path.join(path, 'volcanoes_test', 'test_labels.csv'))

In [73]:
def XFix(X):
    X.loc[-1] = X.columns.values.astype(float).astype(int)  # adding column names as a new row
    X.index = X.index + 1  # shifting index
    X.sort_index(inplace=True)  # sorting the index

XFix(X_train)
XFix(X_test)

In [74]:
print("X_train shape:\t",X_train.shape,"\ny_train shape:\t",y_train.shape,"\nX_test shape:\t",X_test.shape,"\ny_test shape:\t",y_test.shape)

X_train shape:	 (7000, 12100) 
y_train shape:	 (7000, 4) 
X_test shape:	 (2734, 12100) 
y_test shape:	 (2734, 4)


In [75]:
y_train = y_train["Volcano?"]
y_test = y_test["Volcano?"]

In [76]:
X_train = X_train.values.astype(np.float32)
y_train = y_train.values.astype(np.int64)
X_test = X_test.values.astype(np.float32)
y_test = y_test.values.astype(np.int64)

# Normalize pixel values to [0, 1]
X_train /= 255.0
X_test /= 255.0

In [77]:
from tensorflow.keras.utils import to_categorical # convert to one-hot-encoding
y_train = to_categorical(y_train, num_classes = 2)
y_test = to_categorical(y_test, num_classes = 2)

In [78]:
# import torchvision
# from torchvision import transforms
# in this case, we dont have to transform images in tensor because we have images in csv

import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader

class VolcanoDataset(Dataset):
   def __init__ (self, images, labels):
      self.images = torch.tensor(images, dtype = torch.float64)
      self.labels = torch.tensor(labels, dtype = torch.float64)
      
   def __len__(self):
      return len(self.images)
   
   def __getitem__(self, idx):
      return self.images[idx], self.labels[idx]

train_dataset = VolcanoDataset(X_train, y_train)
test_dataset = VolcanoDataset(X_test, y_test)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle= True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [79]:
# NN model
import torch.nn as nn

hidden_units = [64, 32]
input_size = X_train.shape[1] # 12100 (110x110)
output_size = len(np.unique(y_train))

all_layers = []
for hidden_unit in hidden_units:
   all_layers.append(nn.Linear(input_size, hidden_unit))
   all_layers.append(nn.ReLU())
   input_size = hidden_unit # Update input_size for the next layer
   
all_layers.append(nn.Linear(hidden_units[-1], output_size))  # Final layer

model = nn.Sequential(*all_layers)
print(model)

Sequential(
  (0): Linear(in_features=12100, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=32, bias=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=2, bias=True)
)


In [98]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
torch.manual_seed(1)

def train_model(model, train_loader, epochs=25, device=None):
    
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        correct_preds = 0
        total_preds = 0

        for images, labels in train_loader:
            images = images.to(device).float()  # Ensure images are float32 and on the correct device
            labels = labels.to(device).long()  # Ensure labels are long and on the correct device

            # Convert one-hot encoded labels to class indices
            if labels.dim() == 2:  # Check if labels are one-hot encoded
                labels = torch.argmax(labels, dim=1)

            optimizer.zero_grad()

            outputs = model(images)  # Forward pass
            loss = loss_fn(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            running_loss += loss.item()

            # Compute accuracy
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct_preds / total_preds

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")
    
    accuracy = correct_preds / total_preds
    print(f'Accuracy: {accuracy:.4f}')

train_model(model, train_loader)

Epoch 1/25, Loss: 0.0886, Accuracy: 0.9807
Epoch 2/25, Loss: 0.0692, Accuracy: 0.9834
Epoch 3/25, Loss: 0.3414, Accuracy: 0.8969
Epoch 4/25, Loss: 0.4051, Accuracy: 0.8571
Epoch 5/25, Loss: 0.1739, Accuracy: 0.9444
Epoch 6/25, Loss: 0.1998, Accuracy: 0.9563
Epoch 7/25, Loss: 0.0940, Accuracy: 0.9789
Epoch 8/25, Loss: 0.0616, Accuracy: 0.9877
Epoch 9/25, Loss: 0.0657, Accuracy: 0.9844
Epoch 10/25, Loss: 0.0660, Accuracy: 0.9844
Epoch 11/25, Loss: 0.4084, Accuracy: 0.8764
Epoch 12/25, Loss: 0.0892, Accuracy: 0.9804
Epoch 13/25, Loss: 0.0542, Accuracy: 0.9899
Epoch 14/25, Loss: 0.0988, Accuracy: 0.9750
Epoch 15/25, Loss: 0.0558, Accuracy: 0.9884
Epoch 16/25, Loss: 0.2027, Accuracy: 0.9397
Epoch 17/25, Loss: 0.0769, Accuracy: 0.9823
Epoch 18/25, Loss: 0.0807, Accuracy: 0.9809
Epoch 19/25, Loss: 0.1513, Accuracy: 0.9616
Epoch 20/25, Loss: 0.0758, Accuracy: 0.9809
Epoch 21/25, Loss: 0.0533, Accuracy: 0.9884
Epoch 22/25, Loss: 0.2051, Accuracy: 0.9470
Epoch 23/25, Loss: 0.0725, Accuracy: 0.98

In [None]:
# BCEWithLogitsLoss for binary classification
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
torch.manual_seed(1)

def train_model(model, train_loader, epochs=25, device=None):
    
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        correct_preds = 0

        for images, labels in train_loader:
            images = images.to(device).float()
            labels = labels.to(device).float()

            optimizer.zero_grad()

            outputs = model(images)
            loss = loss_fn(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            predicted = (torch.sigmoid(outputs) > 0.5).float()  # Apply sigmoid and threshold at 0.5
            correct_preds += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = (correct_preds / len(train_loader.dataset)) -1   # Calculate accuracy as fraction of total dataset
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")
    
    accuracy = (correct_preds / len(train_loader.dataset)) - 1
    print(f'Accuracy: {accuracy:.4f}')

train_model(model, train_loader)

Epoch 1/25, Loss: 0.0820, Accuracy: 0.9533
Epoch 2/25, Loss: 0.0837, Accuracy: 0.9580
Epoch 3/25, Loss: 0.1341, Accuracy: 0.9206
Epoch 4/25, Loss: 0.0633, Accuracy: 0.9689
Epoch 5/25, Loss: 0.2527, Accuracy: 0.8491
Epoch 6/25, Loss: 0.1258, Accuracy: 0.9239
Epoch 7/25, Loss: 0.0821, Accuracy: 0.9526
Epoch 8/25, Loss: 0.0714, Accuracy: 0.9671
Epoch 9/25, Loss: 0.1414, Accuracy: 0.9203
Epoch 10/25, Loss: 0.0703, Accuracy: 0.9680
Epoch 11/25, Loss: 0.1549, Accuracy: 0.9239
Epoch 12/25, Loss: 0.0678, Accuracy: 0.9653
Epoch 13/25, Loss: 0.0588, Accuracy: 0.9739
Epoch 14/25, Loss: 0.0655, Accuracy: 0.9669
Epoch 15/25, Loss: 0.2891, Accuracy: 0.8246
Epoch 16/25, Loss: 0.3678, Accuracy: 0.7333
Epoch 17/25, Loss: 0.0997, Accuracy: 0.9516
Epoch 18/25, Loss: 0.1094, Accuracy: 0.9397
Epoch 19/25, Loss: 0.1648, Accuracy: 0.9069
Epoch 20/25, Loss: 0.0927, Accuracy: 0.9537
Epoch 21/25, Loss: 0.0633, Accuracy: 0.9704
Epoch 22/25, Loss: 0.2390, Accuracy: 0.8610
Epoch 23/25, Loss: 0.0888, Accuracy: 0.95