In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from utils.losses1 import MarginalChainProperLoss, ForwardProperLoss, scoring_matrix
from src.dataset import Data_handling

from src.weakener import Weakener

In [2]:
Data = Data_handling(dataset='mnist',train_size=0.8,valid_size=0.25)

In [3]:
Data.train_dataset[1][0]

tensor([[[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
         [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
         [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
         [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0

In [4]:
import inspect
inspect.getmembers(Data)

[('__add__',
  <bound method Dataset.__add__ of <src.dataset.Data_handling object at 0x0000028DEC5EA7B0>>),
 ('__annotations__', {}),
 ('__class__', src.dataset.Data_handling),
 ('__class_getitem__', <function Data_handling.__class_getitem__>),
 ('__delattr__',
  <method-wrapper '__delattr__' of Data_handling object at 0x0000028DEC5EA7B0>),
 ('__dict__',
  {'dataset': 'MNIST',
   'tr_size': 0.8,
   'val_size': 0.25,
   'batch_size': 64,
   'shuffle': False,
   'splitting_seed': None,
   'weak_labels': None,
   'virtual_labels': None,
   'valid_dataset': <torch.utils.data.dataset.Subset at 0x28dae35a990>,
   'valid_loader': None,
   'transform': Compose(
       ToTensor()
       Normalize(mean=(0.1307,), std=(0.3081,))
   ),
   'test_dataset': Dataset MNIST
       Number of datapoints: 10000
       Root location: Datasets/raw_datasets
       Split: Test
       StandardTransform
   Transform: Compose(
                  ToTensor()
                  Normalize(mean=(0.1307,), std=(0.3081,))

In [5]:
attributes = inspect.getmembers(Data, lambda a:not(inspect.isroutine(a)))
[a for a in attributes if not(a[0].startswith('__') and a[0].endswith('__'))]

[('batch_size', 64),
 ('dataset', 'MNIST'),
 ('num_classes', 10),
 ('shuffle', False),
 ('splitting_seed', None),
 ('test_dataset',
  Dataset MNIST
      Number of datapoints: 10000
      Root location: Datasets/raw_datasets
      Split: Test
      StandardTransform
  Transform: Compose(
                 ToTensor()
                 Normalize(mean=(0.1307,), std=(0.3081,))
             )),
 ('test_num_samples', 10000),
 ('tr_size', 0.8),
 ('train_dataset', <torch.utils.data.dataset.Subset at 0x28dae3e1d30>),
 ('train_num_samples', 45000),
 ('transform',
  Compose(
      ToTensor()
      Normalize(mean=(0.1307,), std=(0.3081,))
  )),
 ('val_size', 0.25),
 ('valid_dataset', <torch.utils.data.dataset.Subset at 0x28dae35a990>),
 ('valid_loader', None),
 ('valid_num_samples', 15000),
 ('virtual_labels', None),
 ('weak_labels', None)]

In [6]:
[i for i in Data.__dict__.keys() if i[:1] != '_']

['dataset',
 'tr_size',
 'val_size',
 'batch_size',
 'shuffle',
 'splitting_seed',
 'weak_labels',
 'virtual_labels',
 'valid_dataset',
 'valid_loader',
 'transform',
 'test_dataset',
 'num_classes',
 'train_dataset',
 'train_num_samples',
 'test_num_samples',
 'valid_num_samples']

In [7]:
print("Size of training data: \n", len(Data.train_dataset))
print("First training data sample: \n", Data.train_dataset[0][0])
print("First training target: \n", Data.train_dataset[0][1])

Size of training data: 
 45000
First training data sample: 
 tensor([[[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
         [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
         [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
          -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
         [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.42

In [8]:
corr_p = 0.2
weakener = Weakener(true_classes=Data.num_classes)
weakener.generate_M(model_class='pll', corr_p=0.2)
# weakener.generate_M(model_class='unif_noise', corr_p=0.5) #Try this for noisy labels
print(f"Generated M matrix:\n{weakener.M}")
#true_onehot = Data.train_dataset.targets  # shape: (n_samples, n_classes)

targets = [target for _, target in Data.train_dataset]
z, w = weakener.generate_weak(targets)
print(f"Generated z (noisy labels):\n{z}")
print(f"Generated w (multi-label matrix):\n{w}")

Data.include_weak(z)

train_loader, test_loader = Data.get_dataloader(weak_labels='weak')

batch = next(iter(train_loader))
xb, wb, yb = batch
print(f"Inputs batch shape: {xb.shape}")
print(f"Weak (partial) labels shape: {wb.shape}")
print(f"True one-hot labels shape: {yb.shape}")

Generated M matrix:
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 1.34217728e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  1.34217728e-01 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  3.35544320e-02 3.35544320e-02]
 ...
 [2.04800000e-06 2.04800000e-06 2.04800000e-06 ... 2.04800000e-06
  0.00000000e+00 2.04800000e-06]
 [2.04800000e-06 2.04800000e-06 2.04800000e-06 ... 2.04800000e-06
  2.04800000e-06 0.00000000e+00]
 [5.12000000e-07 5.12000000e-07 5.12000000e-07 ... 5.12000000e-07
  5.12000000e-07 5.12000000e-07]]
Generated z (noisy labels):
tensor([ 69, 131, 577,  ..., 487, 159, 644], dtype=torch.int32)
Generated w (multi-label matrix):
tensor([[0., 0., 0.,  ..., 1., 1., 0.],
        [0., 0., 1.,  ..., 1., 0., 0.],
        [1., 0., 0.,  ..., 0., 1., 0.],
        ...,
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 1., 0., 1.]], 

AttributeError: 'Subset' object has no attribute 'tensors'