In [1]:
%%bash
pip install pip --upgrade
pip install synbols --upgrade

Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-21.0.1-py3-none-any.whl (1.5 MB)
Installing collected packages: pip
Successfully installed pip-21.0.1
Defaulting to user installation because normal site-packages is not writeable
Collecting synbols
  Downloading synbols-1.0.2-py3-none-any.whl (34 kB)
Installing collected packages: synbols
Successfully installed synbols-1.0.2


You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.


# Synbols dataset generation

To run this notebook, please use the Dockerfile associated with as you need the Synbols docker image.


In [1]:
import synbols
from synbols.data_io import pack_dataset
from synbols import generate
from synbols import drawing
import random
from synbols.visualization import plot_dataset
import matplotlib.pyplot as plt
import pickle
# bg = drawing.Camouflage(stroke_angle=1.)
# bg = drawing.NoPattern()
bg = None #drawing.MultiGradient(alpha=0.5, n_gradients=2, types=('linear', 'radial'))
# bg = drawing.Gradient(types=('linear',), random_color=drawing.color_sampler(brightness_range=(0.1, 0.9)))

fg = None #drawing.SolidColor((255, 0, 0))


# Generate Minority dataset

In this dataset, we keep the same ratio across classes.

p(a | y=c_1) = p(a | y=c_2)

In [5]:
import numpy as np
from math import pi
from synbols import drawing
from synbols import generate

class InfoSolid(drawing.SolidColor):
    def attribute_dict(self):
        d = super().attribute_dict()
        d['color'] = self.color
        return d
    
rng = np.random.RandomState(1337)
p = .1
blue = (0,0,255)
red = (255, 0, 0)

class MinoritySampler:
    def __init__(self, p):
        self.p = p
        
    def __call__(self, seed):
        """Makes brightness dependent on scale and color dependant on symbol."""
        rng = np.random.RandomState(seed)
        color = [blue, red][rng.choice([0, 1], p=[self.p, 1-self.p])]
        char = rng.choice(['a', 'd'])

        fg = InfoSolid(color)
        fg.color = color

        attr_sampler = generate.basic_attribute_sampler(
            char=char, foreground=fg, background=None, inverse_color=False, resolution=(64, 64))
        attr_sampler = generate.add_occlusion(attr_sampler, n_occlusion=1, scale=0.4)
        d = attr_sampler()
        return d


def make_dataset(p, seed, num):
    attribute_sampler = MinoritySampler(p=p)
    x, mask, y = pack_dataset(generate.dataset_generator(attribute_sampler, num, generate.flatten_mask, dataset_seed=seed))

    for yi in y:
        yi['rotation_attr'] = 1 if  yi['rotation'] >= 0 else 0
        yi['color'] = 'red' if yi['foreground']['color'] == red else 'blue'
    return (x,y,y)             

with open(f'/data/spurious_dataset_50000.pkl', 'wb') as f:
        pickle.dump(make_dataset(p=0.9, seed=1000, num=100_000), f)
        pickle.dump(make_dataset(p=0.5, seed=2000, num=10_000), f)
        pickle.dump(make_dataset(p=0.5, seed=3000, num=10_000), f)

100%|██████████| 1000/1000 [00:12<00:00, 82.42it/s]


# Spurious correlation dataset

In this dataset, the sensitive attribute leads to spurious correlations between the color and the character.

In [None]:
import numpy as np
from math import pi
from synbols import drawing
from synbols import generate

class InfoSolid(drawing.SolidColor):
    def attribute_dict(self):
        d = super().attribute_dict()
        d['color'] = self.color
        return d
    
rng = np.random.RandomState(1337)
p = .1
blue = (0,0,255)
red = (255, 0, 0)

class SpuriousSampler:
    def __init__(self, p):
        self.p = p
        
    def __call__(self, seed):
        """Makes brightness dependent on scale and color dependant on symbol."""
        rng = np.random.RandomState(seed)
        color = [blue, red][rng.choice([0, 1], p=[self.p, 1-self.p])]
        char = rng.choice(['a', 'd'])
        color_p = {'a':self.p, 'd':1-self.p}[char]
        color = [blue, red][rng.choice([0, 1], p=[color_p, color_p])]

        fg = InfoSolid(color)
        fg.color = color

        attr_sampler = generate.basic_attribute_sampler(
            char=char, foreground=fg, background=None, inverse_color=False, resolution=(64, 64))
        attr_sampler = generate.add_occlusion(attr_sampler, n_occlusion=1, scale=0.4)
        d = attr_sampler()
        return d


def make_dataset(p, seed, num):
    attribute_sampler = SpuriousSampler(p=p)
    x, mask, y = pack_dataset(generate.dataset_generator(attribute_sampler, num, generate.flatten_mask, dataset_seed=seed))

    for yi in y:
        yi['color'] = 'red' if yi['foreground']['color'] == red else 'blue'
    return (x,y,y)             


with open(f'/data/spurious_dataset_50000.pkl', 'wb') as f:
        pickle.dump(make_dataset(p=0.9, seed=1000, num=100_000), f)
        pickle.dump(make_dataset(p=0.5, seed=2000, num=10_000), f)
        pickle.dump(make_dataset(p=0.5, seed=3000, num=10_000), f)