# L07 05/04/24

# Exercise 3, classification

# Forest dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
import torchmetrics
from torch.utils.tensorboard import SummaryWriter

  from .autonotebook import tqdm as notebook_tqdm
2024-05-02 22:56:04.146539: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Prepare the heat generators

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Override Dataset

In [3]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, csv):
        df = pd.read_csv(csv, sep=r',')
        df.Cover_Type = df.Cover_Type - 1 # this is needed, classes must start from 0
        self.data = torch.tensor(df.drop(columns=['Cover_Type']).values, dtype=torch.float32) # viene messo in memoria, non si può fare con dataset molto grandi
        self.target = torch.tensor(df.Cover_Type.values, dtype=torch.long)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.target[idx]


## Create the Model

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(54, 128) # 50 input features
        self.fc2 = nn.Linear(128, 128) 
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 16)
        self.fc5 = nn.Linear(16, 7) # 1 output feature

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        
        return x.squeeze()

## Validation

In [13]:
import torch.nn.functional as F

def validate_with_loss(model, val_loader):
    # Validate classification model:
    model.eval()
    total_loss = 0.0
    acc_micro = torchmetrics.Accuracy(task = 'multiclass', num_classes = 7, average = 'micro').to(device)
    acc_macro = torchmetrics.Accuracy(task = 'multiclass', num_classes = 7, average = 'macro').to(device)
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, labels)  # Compute cross-entropy loss
            total_loss += loss.item() * inputs.size(0)  # Multiply by batch size to account for different batch sizes
            _, predicted = torch.max(outputs, 1)
            print('pred:', predicted)
            print('outputs:', labels)
            acc_micro.update(predicted, labels)
            acc_macro.update(predicted, labels)
            total_loss += loss.item()
    return acc_micro.compute(), acc_macro.compute(), total_loss / len(val_loader.dataset)


In [6]:
df = pd.read_csv('../datasets/ForestDataset/val.csv', sep=r',')
df

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type32,Soil_Type33,Soil_Type34,Soil_Type35,Soil_Type36,Soil_Type37,Soil_Type38,Soil_Type39,Soil_Type40,Cover_Type
0,-0.367095,-0.959980,-1.597132,0.146639,-0.834074,-0.908681,0.271454,0.571653,0.281259,4.334805,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,5
1,-0.608916,-0.959980,-1.123963,-0.881597,-0.801414,-0.726043,0.369620,0.264652,0.041574,3.695656,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,5
2,-0.570608,-0.932727,-1.005671,-0.491248,-0.834074,-0.812079,0.402343,0.133080,-0.110952,3.689292,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,5
3,-0.024715,-0.360428,0.887003,-0.476967,0.031410,1.182604,1.318565,-0.393208,-1.396532,4.111148,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,5
4,-0.544271,-0.469437,-1.123963,-0.510290,-0.817744,-0.844531,0.664120,0.527796,-0.110952,3.689292,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1507,-0.278507,-0.360428,-1.597132,0.237086,-0.360507,0.089792,0.402343,0.747083,0.237680,-0.687469,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,3
1508,-0.759755,-0.178745,-0.769087,-0.938722,-0.981043,0.128282,0.762287,0.747083,-0.110952,-1.141147,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,6
1509,-0.714264,1.465481,-0.295918,-0.124701,-0.017579,-0.655855,-0.939268,0.352367,1.043891,-0.396534,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,6
1510,-0.726235,-1.050821,1.241880,-0.762588,-0.605456,0.003756,-0.023046,-2.147500,-1.244006,-1.130237,...,-0.218671,-0.206085,-0.038173,-0.082413,-0.025726,-0.047474,-0.224908,-0.213134,-0.176939,3


In [7]:
model = Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0001)
batch_size = 1512
epochs = 2500
num_workers = 2
n_iter = 0


train_dataset = Dataset('../datasets/ForestDataset/train.csv')
val_dataset = Dataset('../datasets/ForestDataset/val.csv')

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

writer = SummaryWriter('Forest_experiment')

In [8]:
# %load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir={experiment_name}

## Train

In [14]:
import torch.nn.functional as F

n_iter = 0
model.to(device)

for epoch in tqdm(range(epochs)):
    writer.add_scalar("epoch", epoch, n_iter)
    for data, target in train_loader:
        
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)  # Compute the loss
        writer.add_scalar("loss", loss.item(), n_iter)  # Add loss to tensorboard
        loss.backward()
        optimizer.step()
        n_iter += 1

    val_loss = 0.0
    num_val_samples = 0
    
    # measure validation loss    
    acc_micro, acc_max, val_loss = validate_with_loss(model, val_loader)
    
#    writer.add_scalar("val_loss", val_loss, n_iter)  # Add validation loss to tensorboard
#    print(f'Validation Acc Micro: {acc_micro}, Acc Macro: {acc_max}')
    writer.add_scalar("val_acc", acc_micro, n_iter)
    writer.add_scalar("val_loss", val_loss, n_iter)
    
# set description tqdm

  0%|          | 4/2500 [00:00<02:14, 18.53it/s]

pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')


  0%|          | 9/2500 [00:00<02:37, 15.84it/s]

pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')


  1%|          | 14/2500 [00:00<02:12, 18.70it/s]

pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')


  1%|          | 15/2500 [00:00<02:32, 16.33it/s]


pred: tensor([1, 1, 4,  ..., 2, 5, 2], device='cuda:0')
outputs: tensor([4, 4, 4,  ..., 5, 2, 2], device='cuda:0')
pred: 

KeyboardInterrupt: 