In [180]:
import torch as T
import torch.nn as nn

In [181]:
from sklearn.datasets import fetch_covtype
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [192]:
class CovClassificator(nn.Module):
    def __init__(self,hidden_units=128):
        super().__init__()
        self.layer1 = nn.Linear(54, hidden_units)
        self.act1 = nn.ReLU()
        self.drop1 = nn.Dropout(p=0.2)
        self.layer2 = nn.Linear(hidden_units, 64)
        self.act2 = nn.ReLU()
        self.drop2 = nn.Dropout(p=0.2)
        self.output = nn.Linear(64, 7)
    
    def forward(self, x):
        x = self.act1(self.layer1(x))
        x = self.drop1(x)
        x = self.act2(self.layer2(x))
        x = self.drop2(x)
        x = T.nn.functional.softmax(self.output(x), dim=1) 
        return x


In [183]:
device = T.device("cuda:0") if T.cuda.is_available() else T.device("cpu")
#device = T.device("cpu")
net = CovClassificator().to(device)
optimizer = T.optim.Adam(net.parameters(), lr=0.01, weight_decay=1e-5)
loss_fn = nn.CrossEntropyLoss()

In [184]:
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler

# Load the dataset
cov_data = fetch_covtype()

# Separate features and labels
X = cov_data.data
y = cov_data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (optional but often beneficial)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [185]:
from torch.utils.data import DataLoader, TensorDataset

# Convert NumPy arrays to PyTorch tensors
X_train = T.tensor(X_train, dtype=T.float32).to(device)
y_train = T.tensor(y_train, dtype=T.int64).to(device)
X_test = T.tensor(X_test, dtype=T.float32).to(device)
y_test = T.tensor(y_test, dtype=T.int64).to(device)

# Assuming y_train and y_test contain labels in the range 1 to 7
y_train -= 1
y_test -= 1
# Create DataLoader objects
batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



In [186]:
from sklearn.metrics import precision_score, recall_score

def test_model(model, dataloader, loss_fn):
    model.eval()
    test_loss = 0
    correct = 0
    all_preds = []  # To store all predicted labels
    all_targets = []  # To store all true labels

    with T.no_grad():
        for data, target in dataloader:
            output = model(data)
            test_loss += loss_fn(output, target).item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            all_preds.extend(pred.cpu().numpy())  # Convert to CPU and append to the list
            all_targets.extend(target.cpu().numpy())  # Convert to CPU and append to the list

    avg_loss = test_loss / len(dataloader.dataset)
    accuracy = correct / len(dataloader.dataset) * 100

    print(f'Test set: Average loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

    # Calculate precision and recall for each class
    precision = precision_score(all_targets, all_preds, average=None, zero_division=0)
    recall = recall_score(all_targets, all_preds, average=None, zero_division=0)

    # Calculate macro-average precision and recall
    macro_precision = precision_score(all_targets, all_preds, average='weighted', zero_division=1)
    macro_recall = recall_score(all_targets, all_preds, average='weighted', zero_division=0)

    print("Precision for each class:", precision)
    print("Recall for each class:", recall)
    print(f"Macro-average Precision: {macro_precision:.2f}")
    print(f"Macro-average Recall: {macro_recall:.2f}")

# Now you can call the test_model function
test_model(net, test_loader, loss_fn)



Test set: Average loss: 0.0304, Accuracy: 12.90%
Precision for each class: [0.32940246 0.         0.34974093 0.         0.00123686 0.
 0.02088535]
Recall for each class: [0.30324036 0.         0.07583205 0.         0.00100251 0.
 0.38331258]
Macro-average Precision: 0.66
Macro-average Recall: 0.13


In [187]:

# Split the data into tmp and validation sets
X_tmp, X_val, y_tmp, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (optional but often beneficial)
X_val = scaler.transform(X_val)

# Convert NumPy arrays to PyTorch tensors
X_val = T.tensor(X_val, dtype=T.float32).to(device)
y_val = T.tensor(y_val, dtype=T.int64).to(device)

y_val -= 1
# Assuming you have X_val and y_val as your validation data and labels
# Convert X_val and y_val to PyTorch tensors if they are not already
X_val = T.tensor(X_val, dtype=T.float32).to(device)
y_val = T.tensor(y_val, dtype=T.int64).to(device)

# Create a TensorDataset for the validation data and labels
val_dataset = TensorDataset(X_val, y_val)

# Create a DataLoader for the validation dataset
validation_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

  X_val = T.tensor(X_val, dtype=T.float32).to(device)
  y_val = T.tensor(y_val, dtype=T.int64).to(device)


In [188]:
def train_model(model, train_loader, validation_loader, loss_fn, optimizer, num_epochs=100, patience=3, device="cpu", save_path=None):
    # Move the model to the specified device
    model.to(device)
    
    # Initialize variables for early stopping
    best_val_loss = float('inf')
    current_patience = 0
    
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        train_loss = 0
        correct = 0
        
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)  # Move data to device
            optimizer.zero_grad()  # Zero the gradients
            output = model(data)  # Forward pass
            loss = loss_fn(output, target)  # Calculate loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights
            
            train_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
        
        avg_train_loss = train_loss / len(train_loader.dataset)
        accuracy = correct / len(train_loader.dataset) * 100
        
        print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.2f}%')
        
        # Evaluate the model on the validation dataset to monitor performance
        model.eval()
        val_loss = 0
        
        with T.no_grad():
            for data, target in validation_loader:
                data, target = data.to(device), target.to(device)  # Move data to device
                output = model(data)
                loss = loss_fn(output, target)
                val_loss += loss.item()
        
        avg_val_loss = val_loss / len(validation_loader.dataset)
        print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}')
        
        # Implement early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            current_patience = 0
            if save_path is not None:
                T.save(model.state_dict(), save_path)  # Save the best model
        else:
            current_patience += 1
        
        if current_patience >= patience:
            print(f'Early stopping at epoch {epoch + 1} as validation loss did not improve for {patience} epochs.')
            break  # Stop training
    
    print('Training complete.')

# Example usage:
# Replace the placeholders with your actual data loaders, model, loss function, and optimizer
# train_loader, validation_loader, model, loss_fn, optimizer = ...

train_model(net, train_loader, validation_loader, loss_fn, optimizer, num_epochs=100, patience=3, save_path='best_model.pth')


Epoch [1/100], Train Loss: 0.0241, Accuracy: 62.09%
Epoch [1/100], Validation Loss: 0.0246
Epoch [2/100], Train Loss: 0.0243, Accuracy: 61.01%
Epoch [2/100], Validation Loss: 0.0239
Epoch [3/100], Train Loss: 0.0247, Accuracy: 58.69%
Epoch [3/100], Validation Loss: 0.0242
Epoch [4/100], Train Loss: 0.0242, Accuracy: 61.38%
Epoch [4/100], Validation Loss: 0.0245
Epoch [5/100], Train Loss: 0.0246, Accuracy: 59.19%
Epoch [5/100], Validation Loss: 0.0247
Early stopping at epoch 5 as validation loss did not improve for 3 epochs.
Training complete.


In [189]:
test_model(net, test_loader, loss_fn)

Test set: Average loss: 0.0247, Accuracy: 58.52%
Precision for each class: [0.55787917 0.61599678 0.         0.         0.         0.
 0.        ]
Recall for each class: [0.8067533 0.5959469 0.        0.        0.        0.        0.       ]
Macro-average Precision: 0.65
Macro-average Recall: 0.59


In [196]:
def evaluate_model_precision(model, dataloader):
    model.eval()
    all_preds = []  # To store all predicted labels
    all_targets = []  # To store all true labels

    with T.no_grad():
        for data, target in dataloader:
            output = model(data)
            pred = output.argmax(dim=1)
            all_preds.extend(pred.cpu().numpy())  # Convert to CPU and append to the list
            all_targets.extend(target.cpu().numpy())  # Convert to CPU and append to the list

    # Calculate precision for each class and macro-average precision
    precision = precision_score(all_targets, all_preds, average=None, zero_division=0)
    macro_precision = precision_score(all_targets, all_preds, average='weighted', zero_division=0)

    return macro_precision * 100 # Return the macro-average precision as a percentage

In [197]:
def evaluate_model_recall(model, dataloader):
    model.eval()
    all_preds = []  # To store all predicted labels
    all_targets = []  # To store all true labels

    with T.no_grad():
        for data, target in dataloader:
            output = model(data)
            pred = output.argmax(dim=1)
            all_preds.extend(pred.cpu().numpy())  # Convert to CPU and append to the list
            all_targets.extend(target.cpu().numpy())  # Convert to CPU and append to the list

    # Calculate recall for each class and macro-average recall
    recall = recall_score(all_targets, all_preds, average=None, zero_division=0)
    macro_recall = recall_score(all_targets, all_preds, average='weighted', zero_division=0)

    return macro_recall * 100

In [200]:
hyperparameter_grid = {
    'lr': [0.001, 0.01, 0.1],
    'hidden_units': [64, 128, 256],
    'batch_size': [32, 64, 128]
}

# Create a list of hyperparameter combinations to try
param_combinations = list(ParameterGrid(hyperparameter_grid))

# Initialize variables to store the best hyperparameters and performance
best_params = None
best_precision = 0.0  # Initialize with a low value
best_recall = 0.0  # Initialize with a low value

# Loop over each hyperparameter combination
for params in param_combinations:
    # Create a new instance of the model with the current hyperparameters
    model = CovClassificator(hidden_units=params['hidden_units']).to(device)

    # Define the optimizer with the current learning rate
    optimizer = T.optim.Adam(model.parameters(), lr=params['lr'], weight_decay=1e-5)

    # Create DataLoader objects with the current batch size
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)

    validation_dataset = TensorDataset(X_val, y_val)
    validation_loader = DataLoader(validation_dataset, batch_size=params['batch_size'], shuffle=False)

    # Train the model using the training data
    train_model(model, train_loader, validation_loader, loss_fn, optimizer, num_epochs=50, patience=3)

    # After training, evaluate the model on the validation set
    precision = evaluate_model_precision(model, validation_loader)
    recall = evaluate_model_recall(model, validation_loader)

    # Check if this hyperparameter combination gives better precision and recall
    if precision > best_precision and recall > best_recall:
        best_precision = precision
        best_recall = recall
        best_params = params

# Print the best hyperparameters and their corresponding precision and recall
print("Best Hyperparameters:")
print(best_params)
print(f"Best Precision on Validation Set: {best_precision:.2f}%")
print(f"Best Recall on Validation Set: {best_recall:.2f}%")

Epoch [1/50], Train Loss: 0.0452, Accuracy: 72.05%
Epoch [1/50], Validation Loss: 0.0446
Epoch [2/50], Train Loss: 0.0446, Accuracy: 73.78%
Epoch [2/50], Validation Loss: 0.0444
Epoch [3/50], Train Loss: 0.0444, Accuracy: 74.35%
Epoch [3/50], Validation Loss: 0.0442
Epoch [4/50], Train Loss: 0.0443, Accuracy: 74.72%
Epoch [4/50], Validation Loss: 0.0441
Epoch [5/50], Train Loss: 0.0442, Accuracy: 75.03%
Epoch [5/50], Validation Loss: 0.0440
Epoch [6/50], Train Loss: 0.0442, Accuracy: 75.17%
Epoch [6/50], Validation Loss: 0.0439
Epoch [7/50], Train Loss: 0.0441, Accuracy: 75.31%
Epoch [7/50], Validation Loss: 0.0439
Epoch [8/50], Train Loss: 0.0441, Accuracy: 75.49%
Epoch [8/50], Validation Loss: 0.0438
Epoch [9/50], Train Loss: 0.0440, Accuracy: 75.77%
Epoch [9/50], Validation Loss: 0.0437
Epoch [10/50], Train Loss: 0.0439, Accuracy: 76.03%
Epoch [10/50], Validation Loss: 0.0436
Epoch [11/50], Train Loss: 0.0439, Accuracy: 76.17%
Epoch [11/50], Validation Loss: 0.0436
Epoch [12/50], Tr