In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [2]:
from torch.utils.data import Dataset, DataLoader

In [3]:
import pandas as pd
import pickle

In [4]:
import torch.utils.data as utils

In [5]:
import numpy as np

In [6]:
from sklearn.preprocessing import LabelEncoder


In [7]:
from sklearn.model_selection import train_test_split

In [8]:
import sklearn

### STEP 1: FUNCTIONS

In [9]:
# Core calculation of label precisions for one test sample.

def _one_sample_positive_class_precisions(scores, truth):
  """Calculate precisions for each true class for a single sample.
  
  Args:
    scores: np.array of (num_classes,) giving the individual classifier scores.
    truth: np.array of (num_classes,) bools indicating which classes are true.

  Returns:
    pos_class_indices: np.array of indices of the true classes for this sample.
    pos_class_precisions: np.array of precisions corresponding to each of those
      classes.
  """
  num_classes = scores.shape[0]
  pos_class_indices = np.flatnonzero(truth > 0)
  # Only calculate precisions if there are some true classes.
  if not len(pos_class_indices):
    return pos_class_indices, np.zeros(0)
  # Retrieval list of classes for this sample. 
  retrieved_classes = np.argsort(scores)[::-1]
  # class_rankings[top_scoring_class_index] == 0 etc.
  class_rankings = np.zeros(num_classes, dtype=np.int)
  class_rankings[retrieved_classes] = range(num_classes)
  # Which of these is a true label?
  retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
  retrieved_class_true[class_rankings[pos_class_indices]] = True
  # Num hits for every truncated retrieval list.
  retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
  # Precision of retrieval list truncated at each hit, in order of pos_labels.
  precision_at_hits = (
      retrieved_cumulative_hits[class_rankings[pos_class_indices]] / 
      (1 + class_rankings[pos_class_indices].astype(np.float)))
  return pos_class_indices, precision_at_hits


In [10]:
# All-in-one calculation of per-class lwlrap.

def calculate_per_class_lwlrap(truth, scores):
  """Calculate label-weighted label-ranking average precision.
  
  Arguments:
    truth: np.array of (num_samples, num_classes) giving boolean ground-truth
      of presence of that class in that sample.
    scores: np.array of (num_samples, num_classes) giving the classifier-under-
      test's real-valued score for each class for each sample.
  
  Returns:
    per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each 
      class.
    weight_per_class: np.array of (num_classes,) giving the prior of each 
      class within the truth labels.  Then the overall unbalanced lwlrap is 
      simply np.sum(per_class_lwlrap * weight_per_class)
  """
  assert truth.shape == scores.shape
  num_samples, num_classes = scores.shape
  # Space to store a distinct precision value for each class on each sample.
  # Only the classes that are true for each sample will be filled in.
  precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
  for sample_num in range(num_samples):
    pos_class_indices, precision_at_hits = (
      _one_sample_positive_class_precisions(scores[sample_num, :], 
                                            truth[sample_num, :]))
    precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
        precision_at_hits)
  labels_per_class = np.sum(truth > 0, axis=0)
  weight_per_class = labels_per_class / float(np.sum(labels_per_class))
  # Form average of each column, i.e. all the precisions assigned to labels in
  # a particular class.
  per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) / 
                      np.maximum(1, labels_per_class))
  # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
  #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
  #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
  #                = np.sum(per_class_lwlrap * weight_per_class)
  return per_class_lwlrap, weight_per_class

In [11]:
# Calculate the overall lwlrap using sklearn.metrics function.

def calculate_overall_lwlrap_sklearn(truth, scores):
  """Calculate the overall lwlrap using sklearn.metrics.lrap."""
  # sklearn doesn't correctly apply weighting to samples with no labels, so just skip them.
  sample_weight = np.sum(truth > 0, axis=1)
  nonzero_weight_sample_indices = np.flatnonzero(sample_weight > 0)
  overall_lwlrap = sklearn.metrics.label_ranking_average_precision_score(
      truth[nonzero_weight_sample_indices, :] > 0, 
      scores[nonzero_weight_sample_indices, :], 
      sample_weight=sample_weight[nonzero_weight_sample_indices])
  return overall_lwlrap

In [12]:
# Accumulator object version.

class lwlrap_accumulator(object):
  """Accumulate batches of test samples into per-class and overall lwlrap."""  

  def __init__(self):
    self.num_classes = 0
    self.total_num_samples = 0
  
  def accumulate_samples(self, batch_truth, batch_scores):
    """Cumulate a new batch of samples into the metric.
    
    Args:
      truth: np.array of (num_samples, num_classes) giving boolean
        ground-truth of presence of that class in that sample for this batch.
      scores: np.array of (num_samples, num_classes) giving the 
        classifier-under-test's real-valued score for each class for each
        sample.
    """
    assert batch_scores.shape == batch_truth.shape
    num_samples, num_classes = batch_truth.shape
    if not self.num_classes:
      self.num_classes = num_classes
      self._per_class_cumulative_precision = np.zeros(self.num_classes)
      self._per_class_cumulative_count = np.zeros(self.num_classes, 
                                                  dtype=np.int)
    assert num_classes == self.num_classes
    for truth, scores in zip(batch_truth, batch_scores):
      pos_class_indices, precision_at_hits = (
        _one_sample_positive_class_precisions(scores, truth))
      self._per_class_cumulative_precision[pos_class_indices] += (
        precision_at_hits)
      self._per_class_cumulative_count[pos_class_indices] += 1
    self.total_num_samples += num_samples

  def per_class_lwlrap(self):
    """Return a vector of the per-class lwlraps for the accumulated samples."""
    return (self._per_class_cumulative_precision / 
            np.maximum(1, self._per_class_cumulative_count))

  def per_class_weight(self):
    """Return a normalized weight vector for the contributions of each class."""
    return (self._per_class_cumulative_count / 
            float(np.sum(self._per_class_cumulative_count)))

  def overall_lwlrap(self):
    """Return the scalar overall lwlrap for cumulated samples."""
    return np.sum(self.per_class_lwlrap() * self.per_class_weight())


### STEP 2: LOADING DATASET 

In [13]:
class FreeSoundDataset(Dataset):
    """ FreeSound dataset."""

    # Initialize your data, download, etc.
    def __init__(self, X, y):
        
        self.len = X.shape[0]
        self.x_data = torch.from_numpy(X)
        self.y_data = torch.from_numpy(y)

    def __getitem__(self, index):
        return (self.x_data[index], self.y_data[index])

    def __len__(self):
        return self.len

In [14]:
X = np.load('../data/processed/mel/train_curated_mel128.npy')

In [15]:
X = X[:, : ,:128]

In [16]:
X.shape

(4970, 128, 128)

In [17]:
labels = pd.read_csv('../data/processed/train_curated.csv',sep=';')

In [18]:
labels_onehot = np.load('../data/processed/y_onehotenc_train_curated.npy')

In [19]:
X_train, X_test, y_train, y_test=train_test_split(X, labels_onehot, test_size=0.3, random_state=47) #, stratify=target)

In [20]:
print('X_train:', X_train.shape)

print('X_test:', X_test.shape)

print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

X_train: (3479, 128, 128)
X_test: (1491, 128, 128)
y_train: (3479, 80)
y_test: (1491, 80)


In [21]:
train_dataset = FreeSoundDataset(X_train, y_train)

In [22]:
test_dataset = FreeSoundDataset(X_test, y_test)

### STEP 2: MAKING DATASET ITERABLE

In [23]:
batch_size = 32
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)
num_epochs = 100

In [24]:
transformations = transforms.Compose([transforms.ToTensor()])

In [25]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size= batch_size, 
                                           shuffle=True)

In [26]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

### STEP 3: CREATE MODEL CLASS

In [27]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=200, kernel_size=3, stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(200)
        self.relu1 = nn.ReLU()

        # # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        
        
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=200, out_channels=100, kernel_size=3, stride=1, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(100)
        self.relu2 = nn.ReLU()

        # # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)



        # Convolution 3
        self.cnn3 = nn.Conv2d(in_channels=100, out_channels=100, kernel_size=3, stride=1, padding=1)
        self.batchnorm3 = nn.BatchNorm2d(100)
        self.relu3 = nn.ReLU()

        # # Max pool 3
        self.maxpool3 = nn.MaxPool2d(kernel_size=2)


        

        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(100 * 16 * 16, 80) 

        
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x.float())
        out = self.batchnorm1(out)
        out = self.relu1(out)

        # Max pool 1
        out = self.maxpool1(out)



        # Convolution 2
        out = self.cnn2(out)
        out = self.batchnorm2(out)
        out = self.relu2(out)

        # Max pool 2
        out = self.maxpool2(out)



        # Convolution 3
        out = self.cnn3(out)
        out = self.batchnorm3(out)
        out = self.relu3(out)

        # Max pool 3
        out = self.maxpool3(out)


   

        # Dropout 1
        #out = self.dropout(out)

                     
        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)
        
        return out

### STEP 4: INSTANTIATE MODEL CLASS

In [28]:
model = CNNModel()

In [29]:
#######################
#  USE GPU FOR MODEL  #
#######################

if torch.cuda.is_available():
    model.cuda()

### STEP 5: INSTANTIATE LOSS CLASS

In [30]:
criterion = nn.MultiLabelSoftMarginLoss()

In [31]:
#criterion = nn.CrossEntropyLoss()

### STEP 6: INSTANTIATE OPTIMIZER CLASS

In [32]:
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### STEP 7: TRAIN THE MODEL

In [33]:
niter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        if torch.cuda.is_available():
            images = Variable(images.unsqueeze(1).cuda())
            labels = Variable(labels.float().cuda())
        else:
            images = Variable(images.unsqueeze(1))
            labels = Variable(labels)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        #images = images.unsqueeze(1).type(torch.FloatTensor).cuda()
        outputs = model(images)
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        niter += 1
        
        if niter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                #######################
                #  USE GPU FOR MODEL  #
                #######################
                if torch.cuda.is_available():
                    images = Variable(images.unsqueeze(1).cuda())
                else:
                    images = Variable(images.unsqueeze(1))
                
                # Forward pass only to get logits/output
                outputs = model(images)
                
            # Get predictions from the maximum value
            per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(labels.cpu().detach().numpy() , outputs.cpu().detach().numpy() )
            print('Iteration: {}. Loss: {}. Lwlrap from per-class values: {}. '.format(niter, loss.data, np.sum(per_class_lwlrap * weight_per_class)),'\m')
            
            #print("lwlrap from sklearn.metrics =", calculate_overall_lwlrap_sklearn(labels.cpu().detach().numpy() , outputs.cpu().detach().numpy()))
            

Iteration: 500. Loss: 0.056154847145080566. Lwlrap from per-class values: 0.5570878957643663.  \m
Iteration: 1000. Loss: 0.025757526978850365. Lwlrap from per-class values: 0.4273558523743946.  \m
Iteration: 1500. Loss: 0.012749080546200275. Lwlrap from per-class values: 0.5275147942766522.  \m
Iteration: 2000. Loss: 0.007612456567585468. Lwlrap from per-class values: 0.4335649649000816.  \m
Iteration: 2500. Loss: 0.0036713574081659317. Lwlrap from per-class values: 0.3982001833600832.  \m
Iteration: 3000. Loss: 0.010304748080670834. Lwlrap from per-class values: 0.5442245960389077.  \m
Iteration: 3500. Loss: 0.003983215894550085. Lwlrap from per-class values: 0.49467021966760094.  \m
Iteration: 4000. Loss: 0.001142236520536244. Lwlrap from per-class values: 0.44478508632053415.  \m
Iteration: 4500. Loss: 0.0019192267209291458. Lwlrap from per-class values: 0.5230767356425251.  \m
Iteration: 5000. Loss: 0.002414099406450987. Lwlrap from per-class values: 0.5048557794780206.  \m
Iterati

In [63]:
(images,labelt) = next(iter(test_loader))

In [64]:
if torch.cuda.is_available():
    images = Variable(images.unsqueeze(1).cuda())

# Forward pass only to get logits/output
outputs = model(images)

In [65]:
outputs.shape

torch.Size([32, 80])

In [37]:
# Random test data.
num_samples = 100
num_labels = 20

truth = np.random.rand(num_samples, num_labels) > 0.5
# Ensure at least some samples with no truth labels.
truth[0:1, :] = False

scores = np.random.rand(num_samples, num_labels)

In [68]:
per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(labelt.cpu().detach().numpy() , outputs.cpu().detach().numpy() )
print("lwlrap from per-class values=", np.sum(per_class_lwlrap * weight_per_class))
print("lwlrap from sklearn.metrics =", calculate_overall_lwlrap_sklearn(truth, scores))

lwlrap from per-class values= 0.3012275369810512
lwlrap from sklearn.metrics = 0.5657088507625565


In [70]:
labelt.cpu().detach().numpy()[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [74]:
outputs.cpu().detach().numpy()[1]

array([-130.60008 , -112.29409 ,  -90.02456 , -141.55603 , -146.94965 ,
        -92.549416, -186.11739 , -122.21728 ,  -99.73758 , -115.073425,
       -134.75891 , -145.89445 , -143.71509 , -141.80373 ,  -81.79933 ,
        -86.48598 ,  -73.38045 , -112.507095, -138.28325 , -125.82384 ,
        -74.430626,  -82.33194 , -139.56311 ,  -90.3098  ,  -90.489075,
       -126.73173 , -107.03858 ,  -74.31243 ,  -93.32175 , -115.7795  ,
       -101.50602 , -110.544655, -106.480644, -122.641716,  -31.093416,
       -127.77177 , -102.79919 , -125.34312 , -142.04366 ,  -52.844185,
       -118.5954  , -128.69861 ,  -94.46507 , -104.3044  ,  -57.981983,
       -137.85835 , -125.455154, -123.36449 , -175.41052 ,  -71.872375,
       -128.29597 , -134.72775 , -111.067055, -109.17908 , -117.76506 ,
       -102.995125, -101.37364 ,  -53.953674, -114.60611 ,  -45.361805,
        -95.50747 ,  -61.794506,  -89.208626, -115.70798 ,  -84.11334 ,
       -103.25519 , -171.5958  , -138.87572 ,  -83.03413 ,  -88.

In [36]:
# Random test data.
num_samples = 100
num_labels = 20

truth = np.random.rand(num_samples, num_labels) > 0.5
# Ensure at least some samples with no truth labels.
truth[0:1, :] = False

scores = np.random.rand(num_samples, num_labels)

In [37]:
per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap(truth, scores)
print("lwlrap from per-class values=", np.sum(per_class_lwlrap * weight_per_class))
print("lwlrap from sklearn.metrics =", calculate_overall_lwlrap_sklearn(truth, scores))

lwlrap from per-class values= 0.6202861540585706
lwlrap from sklearn.metrics = 0.6202861540585706


In [38]:
per_class_lwlrap, weight_per_class 

(array([0.62235368, 0.60364528, 0.59153904, 0.6855551 , 0.62823323,
        0.56799037, 0.60324937, 0.63270653, 0.62807842, 0.60554864,
        0.64809972, 0.59708764, 0.66036523, 0.64441004, 0.62345638,
        0.60308589, 0.59904848, 0.63103118, 0.62637609, 0.62262411]),
 array([0.05163853, 0.040715  , 0.04865938, 0.04568024, 0.05064548,
        0.05263158, 0.05759682, 0.05263158, 0.05163853, 0.04865938,
        0.04270109, 0.05461768, 0.04270109, 0.05163853, 0.04468719,
        0.05064548, 0.05660377, 0.05660377, 0.05163853, 0.04766634]))

In [42]:
scores.shape, truth.shape

((100, 20), (100, 20))