# things to change  
- custom batching to put samples of the same category next to each other  
- add more samples  

In [1]:
%%writefile data.py

from torch.utils.data import Dataset


predictor_labels = 'Reverse,Forward,One_Shot_Intent,Loop_Intent,Drum,Percussion,Kick,Clap,Hi Hat'.split(',')
features_labels = ['centroid_mean', 'bandwidth_mean', 'bandwidth_low', 'bandwidth_low_10', 'bandwidth_high', 'bandwidth_high_90', 'bandwidth_range', 'onset_strength_mean', 'onset_strength_max', 'zcr_mean', 'onset_strength_ratio', 'onset_frame_count']
gru_labels = ['centroid', 'bandwidth', 'zcr', 'onset_strength']

class MultiModalDataset(Dataset):
    def __init__(self, index_pos, images, energies, spectrums, features, labels):
        self.index_pos = index_pos
        self.images = images
        self.energies = energies
        self.spectrums = spectrums
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return (self.index_pos[idx],
                self.images[idx],
                self.features[idx],
                self.spectrums[idx],
                self.energies[idx],
                self.labels[idx])

Writing data.py


In [2]:
%%writefile model.py

import torch
import torch.nn as nn
import torch.nn.functional as F


dropout_val_1 = 0.35 *(0.9)
dropout_val_2 = 0.5
neuron_count = 32
gnu_neuron_count = 64


class CNNImagesBranch(nn.Module):
    def __init__(self, input_channels, dropout_val=dropout_val_1):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(input_channels, int(neuron_count), kernel_size=3, padding=0),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count)),
            nn.MaxPool2d(2),
            nn.Dropout(dropout_val),

            nn.Conv2d(int(neuron_count), int(neuron_count * 2), kernel_size=3, padding=0),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count * 2)),
            nn.MaxPool2d(2),
            nn.Dropout(dropout_val),

            nn.Conv2d(int(neuron_count * 2), int(neuron_count * 4), kernel_size=3, padding=0),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count * 4)),
            nn.MaxPool2d(2),
            nn.Dropout(dropout_val),
        )

    def forward(self, x):
        # x shape: (batch, time, channels, H, W)
        if x.shape[-1] == 2:
            x = x.permute(0, 1, 4, 2, 3)
        B, T, C, H, W = x.shape
        x = x.view(B*T, C, H, W)
        x = self.cnn(x)
        x = x.reshape(B, T, -1)  # restore time dimension
        return x

class CNNSpectrumsBranch(nn.Module):
    def __init__(self, input_channels, dropout_val=dropout_val_1):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(input_channels, int(neuron_count), kernel_size=(3,3), padding=1),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count)),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout_val),

            nn.Conv2d(int(neuron_count), int(neuron_count * 2), kernel_size=(3,3), padding=1),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count * 2)),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout_val),
        )

    def forward(self, x):
        # x shape: (batch, time, channels, H, W)
        if x.shape[3] == 5:
            x = x.permute(0, 2, 3, 1, 4)
        B, T, C, H, W = x.shape
        x = x.view(B*T, C, H, W)
        #x = x.permute(0, 3, 2, 1)
        x = self.cnn(x)
        x = x.view(B, T, -1)  # restore time dimension
        return x

class CNNEnergiesBranch(nn.Module):
    def __init__(self, input_channels, dropout_val=dropout_val_1):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(input_channels, int(neuron_count), kernel_size=(3,3), padding=1),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count)),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout_val),

            nn.Conv2d(int(neuron_count), int(neuron_count * 2), kernel_size=(3,3), padding=1),
            nn.LeakyReLU(0.01),
            nn.BatchNorm2d(int(neuron_count * 2)),
            nn.MaxPool2d((1, 2)),
            nn.Dropout(dropout_val),
        )

    def forward(self, x):
        # x shape: (batch, time, channels, H, W)
        if x.shape[3] == 3:
            x= x.permute(0, 2, 3, 1, 4)
        B, T, C, H, W = x.shape
        x = x.view(B*T, C, H, W)
        x = self.cnn(x)
        x = x.view(B, T, -1)  # restore time dimension
        return x


class FeaturesBranch(nn.Module):
    def __init__(self, input_dim, dropout_val=dropout_val_1):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, int(neuron_count * 2)),
            nn.LeakyReLU(0.01),
            nn.Dropout(dropout_val)
        )
    def forward(self, x):
        #print(f'x shape from features branch: {x}')
        return self.fc(x)


class GRUBranch(nn.Module):
    def __init__(self, input_dim, hidden_dim=int(gnu_neuron_count), dropout_val=dropout_val_1, num_layers=2):
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True, dropout=dropout_val)

    def forward(self, x):
        # x shape: (batch, time, feature_dim)
        output, _ = self.gru(x)
        # Take last time step
        return output[:, -1, :]

def custom_collate_fn(batch):
    idx = images = [torch.tensor(item[0]).int() for item in batch]
    images = [item[1].float() for item in batch]
    features = torch.stack([item[2] for item in batch])
    spectrums = [item[3].float() for item in batch]
    energies = [item[4].float() for item in batch]
    labels = torch.stack([torch.tensor(item[5]) for item in batch])
    return {
        'index_pos': idx,
        'images': images,
        'features': features,
        'spectrums': spectrums,
        'energies': energies,
        'labels': labels
    }

class FullModel(nn.Module):
    def __init__(self, img_shape, energy_shape, spectrum_shape, feature_dim, output_dim, dropout_val_1=0.35, dropout_val_2=0.5):
        super().__init__()

        # CNN branches
        self.images_cnn = CNNImagesBranch(img_shape[0], dropout_val_1)
        self.energies_cnn = CNNEnergiesBranch(energy_shape[0], dropout_val_1)
        self.spectrums_cnn = CNNSpectrumsBranch(spectrum_shape[0], dropout_val_1)

        # GRU branches
        self.images_gru = GRUBranch(self._cnn_output_dim(self.images_cnn, img_shape), dropout_val=dropout_val_1)
        self.energies_gru = GRUBranch(self._cnn_output_dim(self.energies_cnn, energy_shape), dropout_val=dropout_val_1)
        self.spectrums_gru = GRUBranch(self._cnn_output_dim(self.spectrums_cnn, spectrum_shape), dropout_val=dropout_val_1)

        # Feature branch
        self.features_branch = FeaturesBranch(feature_dim, dropout_val_1)

        # Final layers
        total_concat_dim = 2*int(gnu_neuron_count) + 2*int(gnu_neuron_count) + 2*int(gnu_neuron_count) + int(neuron_count*2)  # three GRUs (bidirectional 64), plus feature 64
        self.fc = nn.Sequential(
            nn.Linear(total_concat_dim, int(gnu_neuron_count * 4)),
            nn.LeakyReLU(0.01),
            nn.Dropout(dropout_val_2),
            nn.Linear(int(gnu_neuron_count * 4), output_dim),
            nn.Sigmoid()
        )

        self.bidirectional = True

    def _cnn_output_dim(self, cnn_module, input_shape):
        # Dummy forward pass to find flattened CNN feature size
        #print(f'cnn_output_dim shape: {input_shape} module: {cnn_module}')
        if len(input_shape) == 3:
            C, H, W = input_shape
            x = torch.zeros(1, 1, C, H, W)
        elif len(input_shape) == 2:
            C, W = input_shape
            x = torch.zeros(1, 1, C, 1, W)  # Will be reshaped inside CNN forward anyway
        else:
            raise ValueError(f"Unexpected input_shape: {input_shape}")
        #print(x.shape)
        out = cnn_module(x)  # make shape (1, C, H, W) or (1, C, W)
        return out.view(1, -1).size(1)

    def get_bidirectional_last_layer(self, h):
        if self.bidirectional:
            # Assume 2-layer bidirectional GRU: (4, B, H), for example
            # Take last layer's forward and backward states
            forward = h[-2, :, :]  # last forward layer
            backward = h[-1, :, :]  # last backward layer
            return torch.cat([forward, backward], dim=1)

    def forward(self, images, features, spectrums, energies):
        # All 4 inputs are:
        # - images:    List[Tensor] each of shape (T_i, C, H, W)
        # - features:  Tensor of shape (B, feature_dim)
        # - spectrums: List[Tensor] each of shape (T_i, C, H, W)
        # - energies:  List[Tensor] each of shape (T_i, C, H, W)

        batch_img_feat = []
        batch_spec_feat = []
        batch_energy_feat = []
    
        for img_seq in images:
            img_seq = img_seq.unsqueeze(0)  # (1, T, C, H, W)
            out = self.images_cnn(img_seq)  # (1, T, feat)
            out = out.squeeze(0)            # (T, feat)
            batch_img_feat.append(out)
    
        for spec_seq in spectrums:
            spec_seq = spec_seq.unsqueeze(0)
            spec_seq = spec_seq.unsqueeze(0)
            out = self.spectrums_cnn(spec_seq)
            out = out.squeeze(0)
            batch_spec_feat.append(out)
    
        for energy_seq in energies:
            energy_seq = energy_seq.unsqueeze(0)
            energy_seq = energy_seq.unsqueeze(0)
            out = self.energies_cnn(energy_seq)
            out = out.squeeze(0)
            batch_energy_feat.append(out)
    
        # Now GRUs — we use pack_sequence to handle variable lengths
        packed_img = nn.utils.rnn.pack_sequence(batch_img_feat, enforce_sorted=False)
        packed_spec = nn.utils.rnn.pack_sequence(batch_spec_feat, enforce_sorted=False)
        packed_energy = nn.utils.rnn.pack_sequence(batch_energy_feat, enforce_sorted=False)
    
        _, h_img = self.images_gru.gru(packed_img)
        _, h_spec = self.spectrums_gru.gru(packed_spec)
        _, h_energy = self.energies_gru.gru(packed_energy)
    
        # h_n shape: (num_layers * num_directions, batch, hidden_dim)
        # We want to concatenate final forward and backward layers
    
        img_feat = self.get_bidirectional_last_layer(h_img)
        spec_feat = self.get_bidirectional_last_layer(h_spec)
        energy_feat = self.get_bidirectional_last_layer(h_energy)
    
        # Static features branch
        feat_out = self.features_branch(features)  # (B, 64)
    
        # Merge
        merged = torch.cat([img_feat, spec_feat, energy_feat, feat_out], dim=1)
    
        # Final classifier
        out = self.fc(merged)
        return out

Writing model.py


In [3]:
%%writefile trainer.py

from torch.utils.data import Dataset, DataLoader
import torch.distributed as dist
from sklearn.metrics import classification_report, f1_score, accuracy_score, roc_auc_score
import torch
import torch.distributed
import torch.nn as nn
import pandas as pd
from sklearn.metrics import classification_report, f1_score, accuracy_score, roc_auc_score
import numpy as np
import time


def arr_avg(arr, highest_val, threshold):
    l = len(arr)
    if l == 3:
        summed_vals = 0
        for i in arr:
            summed_vals += i
        avg = summed_vals/l
        if highest_val - avg > threshold:
            #print(f'highest val: {highest_val} current val: {summed_vals/l}')
            return True
        else:
            return False
    else:
        return False


def evaluate(model, val_dataloader, criterion, device, world_size, train_loss, train_acc, train_auc, epoch, epochs, cached_results, best_acc, checkpoint_epoch, max_mem):
    model.eval()
    total_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in val_dataloader:
            images = [x.to(device) for x in batch['images']]
            spectrums = [x.to(device) for x in batch['spectrums']]
            energies = [x.to(device) for x in batch['energies']]
            features = batch['features'].to(device)
            labels = batch['labels'].float().to(device)
    
            outputs = model(images, features, spectrums, energies)
            outputs = outputs.float()  # ensure correct dtype
    
            loss = criterion(outputs, labels)
            total_loss += loss.item()
    
            # Save for metrics
            all_labels.append(labels.cpu())
            all_preds.append(outputs.cpu())

    # Concatenate all batches
    all_labels = torch.cat(all_labels).numpy()
    all_preds = torch.cat(all_preds).numpy()

    # Compute accuracy
    preds_binary = (all_preds >= 0.5).astype(int)
    acc = accuracy_score(all_labels, preds_binary)

    # Compute AUC (handle single-class edge case)
    try:
        auc = roc_auc_score(all_labels, all_preds)
    except ValueError:
        auc = float('nan')  # or 0.0 or log warning if desired

    avg_loss = total_loss / len(val_dataloader)
    

    metrics = torch.tensor(
        [train_loss, train_acc, train_auc, avg_loss, acc, auc],
        dtype=torch.float32,
        device=device
    )

    outs = [torch.zeros_like(metrics) for _ in range(world_size)]
    dist.all_gather(outs, metrics)

    gathered_metrics = [o.cpu().numpy().tolist() for o in outs]
    
    if dist.get_rank() == 0:
        metrics_array = torch.stack(outs)        # shape: [world_size, num_metrics]
        avg_metrics = metrics_array.mean(dim=0)

        print('-' * 60)
        print(f"Epoch: {epoch+1}/{epochs}")
        print(f"Train Loss: {avg_metrics[0]:.4f} Train acc: {avg_metrics[1]:.4f} Train AUC: {avg_metrics[2]:.4f}")
        print(f'Val   Loss: {avg_metrics[3]:.4f} Val   acc: {avg_metrics[4]:.4f} Val   AUC: {avg_metrics[5]:.4f}')

        cached_results.append(avg_metrics[4])
        if len(cached_results) > 3:
            cached_results.pop(0)

        if avg_metrics[4] > best_acc:
            checkpoint_epoch = epoch
            torch.save(model.module.state_dict(), 'best_weights.pth')
            print(f'Accuracy increased to {avg_metrics[4]:.4f} from {best_acc:.4f}. Saved copy of new weights')
            best_acc = avg_metrics[4]
            cached_results.clear()
    torch.distributed.barrier()
    max_mem = max_mem / (1024**3)
    print(f'max mem used on gpu{device}: {max_mem:.4f}GB')
    """if arr_avg(cached_results, best_acc, 0.25):
        model.load_state_dict(torch.load('/kaggle/working/best_weights.pth', weights_only=True))
        print('acc has fallen by more than 25%. Reloading best weights')
        checkpoint_epoch = epoch
        cached_results.clear()"""
        
    
    return best_acc, checkpoint_epoch


def compute_metrics(preds, targets, threshold=0.5):
    preds_bin = (preds > threshold).astype(int)
    acc = accuracy_score(targets, preds_bin)
    auc = roc_auc_score(targets, preds)
    return acc, auc

class Trainer():
    def __init__(
        self,
        model: nn.Module,
        train_dataloader: DataLoader,
        val_dataloader: DataLoader,
        device,
        world_size
    ) -> None:
        self.model = model
        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.device = device
        self.world_size = world_size

    def _save_checkpoint(self):
        ckp = self.model.module.state_dict()
        torch.save(ckp, "checkpoint.pt")

    def train(self, epochs: int, train_sampler, optimizer):
        self.model.to(self.device)
        torch.cuda.reset_peak_memory_stats()
        
        cached_results = []
        best_acc = 0
        checkpoint_epoch = 0
        
        criterion = nn.BCELoss()  # or BCEWithLogitsLoss if you remove sigmoid from model output
    
        for epoch in range(epochs):
            self.model.train()
            train_sampler.set_epoch(epoch)
            total_loss = 0.0
            all_preds = []
            all_labels = []
    
            for batch in self.train_dataloader:
                images = [x.to(self.device) for x in batch['images']]
                spectrums = [x.to(self.device) for x in batch['spectrums']]
                energies = [x.to(self.device) for x in batch['energies']]
                features = batch['features'].to(self.device)
                labels = batch['labels'].float().to(self.device)
    
                optimizer.zero_grad()
                outputs = self.model(images, features, spectrums, energies)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
    
                total_loss += loss.item()
    
                all_labels.append(labels.cpu())
                all_preds.append(outputs.cpu())
    
            # Concatenate all batches
            all_labels = torch.cat(all_labels).detach().numpy()
            all_preds = torch.cat(all_preds).detach().numpy()
        
            # Compute accuracy
            preds_binary = (all_preds >= 0.5).astype(int)
            acc = accuracy_score(all_labels, preds_binary)
    
            try:
                auc = roc_auc_score(all_labels, all_preds)
            except ValueError:
                auc = float('nan')  # or 0.0 or log warning if desired
    
            avg_train_loss = total_loss / len(self.train_dataloader)
            max_mem = torch.cuda.max_memory_allocated()
            if self.val_dataloader:
                #val_loss, val_acc, val_auc = evaluate(self.model, self.val_dataloader, criterion, self.device, self.world_size, avg_train_loss, acc, auc)
                best_acc, checkpoint_epoch = evaluate(self.model, self.val_dataloader, criterion, self.device, self.world_size, avg_train_loss, acc, auc, epoch, epochs, cached_results, best_acc, checkpoint_epoch, max_mem)

        if dist.get_rank() == 0:
            print(f'Best Epoch : {checkpoint_epoch + 1} - accuracy: {best_acc}')
        time.sleep(5)

    def infer_best_model(self, predictor_labels, y_test):
        #reload model
        map_location = torch.device(self.device) if not isinstance(self.device, torch.device) else self.device
        #state_dict = torch.load('/kaggle/input/best-model/pytorch/default/1/best_weights.pth', map_location=map_location)
        state_dict = torch.load('/kaggle/working/best_weights.pth', map_location=map_location, weights_only=True)
        if isinstance(self.model, torch.nn.parallel.DistributedDataParallel):
            self.model.module.load_state_dict(state_dict, strict=True)
        else:
            self.model.load_state_dict(state_dict, strict=True)
        
        self.model.eval()
        gpu_preds = []
        
        #get preds from test data
        with torch.no_grad():
            for batch in self.val_dataloader:
                indexes = [x.to(self.device) for x in batch['index_pos']]
                images = [x.to(self.device) for x in batch['images']]
                spectrums = [x.to(self.device) for x in batch['spectrums']]
                energies = [x.to(self.device) for x in batch['energies']]
                features = batch['features'].to(self.device)

                idx = torch.tensor(
                    indexes,
                    dtype=torch.int,
                    device=self.device
                )
                idx = idx.unsqueeze(1)

                outputs = self.model(images, features, spectrums, energies)
                outputs = outputs.float()  # ensure correct dtype
                outputs = torch.cat((idx, outputs), axis=1)
    
                gpu_preds.append(outputs.cpu())
                
        gpu_preds = torch.cat(gpu_preds).numpy()
        ids = gpu_preds[:, 0].astype(int)
        preds = (gpu_preds[:, 1:] >= 0.55).astype(int)
        
        # recombine
        gpu_preds = np.column_stack((ids, preds))

        preds_tensor = torch.tensor(
            gpu_preds,
            dtype=torch.float32,
            device=self.device
        )
        all_preds = [torch.zeros_like(preds_tensor) for _ in range(self.world_size)]
        dist.all_gather(all_preds, preds_tensor)
         
        if dist.get_rank() == 0:
            gathered_preds = torch.cat(all_preds, axis=0)
            gathered_preds = gathered_preds.cpu().numpy()

            labs = ['id', 'file_path', 'file_name'] + predictor_labels
            y_meta = y_test[['id', 'file_path', 'file_name']].copy()
            
            pred_df = pd.DataFrame(gathered_preds, columns= ['id'] + predictor_labels)            
            pred_df = joined = pd.merge(y_meta, pred_df, on='id', how='left')
            
            #f1 score
            print(f"F1 Score: {f1_score(y_test[predictor_labels], pred_df[predictor_labels], average='weighted')}")
            print(classification_report(y_test[predictor_labels], pred_df[predictor_labels], target_names=predictor_labels))
            
            #write excel file
            with pd.ExcelWriter("even_loop_oneshot_2.xlsx") as writer:
                pred_df.to_excel(writer, sheet_name="predicted values", index=False)
                y_test[labs].to_excel(writer, sheet_name="actual values", index=False)
                

Writing trainer.py


In [4]:
%%writefile ddp.py

import joblib

import pandas as pd
import numpy as np
import os
import sys
import torch
import torch.nn as nn
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import init_process_group, destroy_process_group
import torch.multiprocessing as mp
from torch.utils.data import DataLoader, DistributedSampler

from data import MultiModalDataset, predictor_labels, features_labels
from model import FullModel, custom_collate_fn
from trainer import Trainer

from sklearn.model_selection import train_test_split
from pympler import asizeof
import gc
import psutil


def prepare(rank, world_size, train_data, test_data, batch_size=32, pin_memory=False, num_workers=0):
    train_sampler = DistributedSampler(train_data, num_replicas=world_size, rank=rank, shuffle=True, drop_last=False)
    train_dataloader = DataLoader(train_data, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=train_sampler, collate_fn=custom_collate_fn)

    test_sampler = DistributedSampler(test_data, num_replicas=world_size, rank=rank, shuffle=False, drop_last=False)
    test_dataloader = DataLoader(test_data, batch_size=batch_size, pin_memory=pin_memory, num_workers=num_workers, drop_last=False, shuffle=False, sampler=test_sampler, collate_fn=custom_collate_fn)
    
    return train_dataloader, test_dataloader, train_sampler

def ddp_setup(rank, world_size):
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = "12355"
    backend = "nccl" if torch.cuda.is_available() else "gloo"
    init_process_group(backend=backend, rank=rank, world_size=world_size)


def main(rank:int, world_size:int, total_epochs: int, batch_size: int, lr, img_shape, energy_shape, spectrum_shape, feature_dim, output_dim, X_images, X_features, X_spectrums, X_energies, labels, labels_cols):
    ddp_setup(rank, world_size)
    torch.cuda.set_device(rank)

    X_train_images, X_test_images, X_train_features, X_test_features, X_train_spectrums, X_test_spectrums, X_train_energies, X_test_energies, y_train, y_test = train_test_split(X_images, X_features, X_spectrums, X_energies, labels, test_size=0.2, random_state=42, stratify=labels[['Reverse', 'Forward', 'One_Shot_Intent', 'Loop_Intent', 'Drum', 'Hi Hat']])
    
    y_train_np = y_train[predictor_labels].to_numpy()
    y_test_np = y_test[predictor_labels].to_numpy()
    
    train_dataset = MultiModalDataset(y_train['id'].to_numpy(), X_train_images, X_train_energies, X_train_spectrums, X_train_features, y_train_np)
    val_dataset = MultiModalDataset(y_test['id'].to_numpy(), X_test_images, X_test_energies, X_test_spectrums, X_test_features, y_test_np)

    
    train_dataloader, val_dataloader, train_sampler = prepare(rank, world_size, train_dataset, val_dataset, batch_size=batch_size)
    
    model = FullModel(img_shape, energy_shape, spectrum_shape, feature_dim, output_dim)
    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model = model.to(rank)
    model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=False)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    trainer = Trainer(model, train_dataloader, val_dataloader, rank, world_size)
    gc.collect()

    trainer.train(total_epochs, train_sampler, optimizer)
    trainer.infer_best_model(predictor_labels, y_test)
    
    destroy_process_group()


if __name__ == "__main__":
    world_size = torch.cuda.device_count()

    print('loading labels')
    labels = joblib.load('/kaggle/input/classification-data/cut.joblib')
    print('loaded labels')
    
    print('loading images')
    images = joblib.load('/kaggle/input/classification-data/images.joblib')
    print('loaded images')
    
    print('loading energies')
    energies = joblib.load('/kaggle/input/classification-data/energies.joblib')
    print('loaded energies')
    
    print('loading spectrums')
    spectrums = joblib.load('/kaggle/input/classification-data/spectrums.joblib')
    print('loaded spectrums')

    labs = ['id', 'file_path', 'file_name'] + predictor_labels

    images = np.asarray(images, dtype='object')
    energies = np.asarray(energies, dtype='object')
    spectrums = np.asarray(spectrums, dtype='object')
    features = labels[features_labels].astype(np.float32).to_numpy()

    img_shape = images[0][0].transpose(2, 0, 1).shape
    energy_shape = (energies[0][0].shape)
    spectrum_shape = (spectrums[0].shape[1], 1, spectrums[0].shape[2])
    feature_dim = labels[features_labels].shape[-1]
    output_dim = len(predictor_labels)

    X_images = [torch.tensor(img, dtype=torch.float32).share_memory_() for img in images]
    X_spectrums = [torch.tensor(spec, dtype=torch.float32).share_memory_() for spec in spectrums]
    X_energies = [torch.tensor(energy, dtype=torch.float32).share_memory_() for energy in energies]
    X_features = torch.tensor(features, dtype=torch.float32).share_memory_()
    
    total_epochs = 140
    batch_size = int(16)
    lr = 0.00064 * 1.01
    
    print(f'world size: {world_size}')

    del images, energies, spectrums, features  # the joblib objects
    gc.collect()
    
    mp.spawn(main, args=(world_size, total_epochs, batch_size, lr, img_shape, energy_shape, spectrum_shape, feature_dim, output_dim, X_images, X_features, X_spectrums, X_energies, labels, labs), nprocs=world_size)

Writing ddp.py


In [5]:
!python3 ddp.py

loading labels
loaded labels
loading images
loaded images
loading energies
loaded energies
loading spectrums
loaded spectrums
world size: 2
------------------------------------------------------------
Epoch: 1/140
Train Loss: 29.8568 Train acc: 0.0138 Train AUC: 0.6595
Val   Loss: 27.6991 Val   acc: 0.0273 Val   AUC: 0.6967
Accuracy increased to 0.0273 from 0.0000. Saved copy of new weights
max mem used on gpu0: 3.5882GB
max mem used on gpu1: 1.8541GB
------------------------------------------------------------
Epoch: 2/140
Train Loss: 26.2306 Train acc: 0.0385 Train AUC: 0.7040
Val   Loss: 24.7915 Val   acc: 0.0664 Val   AUC: 0.7094
Accuracy increased to 0.0664 from 0.0273. Saved copy of new weights
max mem used on gpu1: 3.3196GB
max mem used on gpu0: 3.5882GB
------------------------------------------------------------
Epoch: 3/140
Train Loss: 23.9707 Train acc: 0.0636 Train AUC: 0.7163
Val   Loss: 21.0145 Val   acc: 0.1295 Val   AUC: 0.7365
Accuracy increa

In [6]:
import os
os.remove("/kaggle/working/data.py")
os.remove("/kaggle/working/ddp.py")
os.remove("/kaggle/working/model.py")
os.remove("/kaggle/working/trainer.py")

!rm -rf /kaggle/working/__pycache__
!ls /kaggle/working/

best_weights.pth  even_loop_oneshot_2.xlsx  __notebook__.ipynb
