# Sigmoid/ Softmax/ Log Likelihood/ NLL 

In [1]:
from fastai import *
from fastai.vision.all import *

In [2]:
# Pick a random number of datapoints and a random number of classes
n_batch = 6
n_classes = 3

In [3]:
# Simulate a classification problem
activations = torch.randn((n_batch, n_classes))
activations

tensor([[-0.8462,  0.6815,  0.4842],
        [ 0.1994, -0.2347, -0.8847],
        [ 2.3776, -0.8194,  2.2221],
        [-0.9184, -0.0412,  2.5192],
        [ 0.7622,  0.8399, -0.6373],
        [-0.3377,  0.5634, -0.8695]])

In [4]:
# Define seed for reproductibility
np.random.seed(42)

# Define the corresponding targets
targets = torch.zeros_like(activations)

# Define two lists for indexing xs and ys
# In ys, simulate only one class from n_classes for the respective datapoints
row_indices = range(n_batch)
col_indices = np.array(np.random.rand(1, n_batch)  // (1. / n_classes), dtype = np.int)

# Use the lists to index into targets and define them accordingly
targets[row_indices, col_indices] = 1
targets

tensor([[0., 1., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])

## Sigmoid

The sigmoid function is defined as follows

$$ sigmoid(x) = \frac{1}{1 + e^{-x}} $$

It only considers one output at a time because of which the output probabilities don't sum to one.

In [5]:
# View the sigmoid activations
activations.sigmoid()

tensor([[0.3002, 0.6641, 0.6187],
        [0.5497, 0.4416, 0.2922],
        [0.9151, 0.3059, 0.9022],
        [0.2853, 0.4897, 0.9255],
        [0.6818, 0.6985, 0.3459],
        [0.4164, 0.6372, 0.2954]])

In [6]:
# Verify using the formula
1 / (1 + torch.exp(-activations))

tensor([[0.3002, 0.6641, 0.6187],
        [0.5497, 0.4416, 0.2922],
        [0.9151, 0.3059, 0.9022],
        [0.2853, 0.4897, 0.9255],
        [0.6818, 0.6985, 0.3459],
        [0.4164, 0.6372, 0.2954]])

In [44]:
assert torch.allclose(1 / (1 + torch.exp(-activations)), activations.sigmoid())

## Softmax

Softmax function is defined for a range of activations. If there are several activations $x_1, x_2, x_3, ..., x_n$ then softmax of an activation is defined as 

$$softmax(x) = \frac{e^x}{\Sigma_{i=1}^{n}(e^x)}$$

This squishes the activations to be in the range from 0 - 1

In [7]:
sm = activations.softmax(dim = 1)
sm

tensor([[0.1065, 0.4907, 0.4028],
        [0.5035, 0.3262, 0.1703],
        [0.5272, 0.0216, 0.4513],
        [0.0290, 0.0697, 0.9014],
        [0.4296, 0.4644, 0.1060],
        [0.2469, 0.6080, 0.1451]])

In [32]:
# Define the denominators by summing across the columns for each row
drs = torch.ones_like(activations)
drs = drs * torch.exp(activations).sum(dim = 1).unsqueeze(1)
drs

tensor([[ 4.0287,  4.0287,  4.0287],
        [ 2.4243,  2.4243,  2.4243],
        [20.4469, 20.4469, 20.4469],
        [13.7769, 13.7769, 13.7769],
        [ 4.9879,  4.9879,  4.9879],
        [ 2.8892,  2.8892,  2.8892]])

In [33]:
# Verify using the formula
torch.exp(activations) / drs

tensor([[0.1065, 0.4907, 0.4028],
        [0.5035, 0.3262, 0.1703],
        [0.5272, 0.0216, 0.4513],
        [0.0290, 0.0697, 0.9014],
        [0.4296, 0.4644, 0.1060],
        [0.2469, 0.6080, 0.1451]])

In [43]:
# See if our manual way and torch's internal way match
assert torch.allclose(torch.exp(activations) / drs, activations.softmax(dim = 1))

## Cross Entropy Loss

Very popularly used for classification problems, cross entropy loss is defined as follows

$$ Cross \ Entropy \ Loss = -\Sigma_{i = 1}^{n}{t_i}log(p_i)$$

where 
- n is the total number of classes
- t is the truth value of the ith class
- p is the probability of the ith class

In a single label classification problem, only one class has $t_i = 0$ and the rest of activations are all zeros. This reduces the loss further down to have only one term which is the log of the probability score of the target for that datapoint.

### Manually perform cross entropy steps

- Index into softmax activations and pick probabilities corresponding to target values
- Take a log of these indexed softmax activations
- Sum the series and negate it

In [46]:
# Get the terms corresponding to the target indices
probas = sm[range(n_batch), col_indices]
probas

tensor([[0.4907, 0.1703, 0.4513, 0.0697, 0.4296, 0.2469]])

In [48]:
# Take a log of these values
log_probas = torch.log(probas)
log_probas

tensor([[-0.7120, -1.7703, -0.7957, -2.6642, -0.8448, -1.3987]])

In [68]:
# Find the negative log likelihood
neg_log_probas = -1 * log_probas
neg_log_probas

tensor([[0.7120, 1.7703, 0.7957, 2.6642, 0.8448, 1.3987]])

In [70]:
# Sum the losses and negate it
loss_value = neg_log_probas.sum() 
loss_value

tensor(8.1857)

### Use internal cross entropy loss 

In [66]:
# See losses for each individual datapoint
torch.nn.CrossEntropyLoss(reduction = 'none')(activations, tensor(col_indices[0]))

tensor([0.7120, 1.7703, 0.7957, 2.6642, 0.8448, 1.3987])

In [67]:
# See the overall loss by summing across all the elements
torch.nn.CrossEntropyLoss(reduction = 'sum')(activations, tensor(col_indices[0]))

tensor(8.1857)

### Use internal log_softmax and nll_loss functions

In [79]:
# Individual datapoint loss
l_sm = F.log_softmax(activations, dim = 1)
F.nll_loss(l_sm, tensor(col_indices[0]), reduction = 'none')

tensor([0.7120, 1.7703, 0.7957, 2.6642, 0.8448, 1.3987])

In [80]:
# Cumulative loss
l_sm = F.log_softmax(activations, dim = 1)
F.nll_loss(l_sm, tensor(col_indices[0]), reduction = 'sum')

tensor(8.1857)

# fastai convenience functions

There are a lot of convenience functions defined in the `fastcore.basics` module which can come in very handy for several operations especially when processing the data.

We shall have a look at the following ones which I envision to be really handy

- L: It is a class which is very much like a list but slightly better.
- listify/tuplify: It is a function which converts any collection/iterable into a list/tuple
- uniqueify: It is a function that grabs all the unique items from a collection
- last_index: Find the last occurence of an element in a collection
- filter_dict: Filters the kv pairs of a dictionary based on a function taking k,v as arguments
- filter_keys: Filters the kv pairs of a dictionary based on a function that takes k as arguments
- filter_values: Filters the kv pairs of a dictionary based on a function that takes v as arguments

`fastai` has introduced a really nice wrapper around python's list class called `L`. It is functionally the same as a list but it's more convenient to work with. Let me show you with an example.

In [81]:
path = untar_data(URLs.PETS)

In [137]:
(path/"images").ls()

(#7393) [Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/samoyed_48.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/chihuahua_119.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/american_pit_bull_terrier_82.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Bengal_182.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/keeshond_190.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/english_setter_96.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Abyssinian_101.mat'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Bengal_75.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/saint_bernard_195.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Siamese_80.jpg')...]

In [139]:
# Converts any given collection into a list
listify(set({1,2,3,4}))

[1, 2, 3, 4]

In [143]:
listify({"k":2, "v":3})

['k', 'v']

In [144]:
listify((1,2,3))

[1, 2, 3]

In [145]:
# Converts any given collection into a tuple
tuplify(set({1,2,3,4}))

(1, 2, 3, 4)

In [146]:
tuplify({"k":2, "v":3})

('k', 'v')

In [147]:
tuplify([1, 2, 4])

(1, 2, 4)

In [151]:
# Grabs a list of all the unique elements in a provided collection
uniqueify([3,1,2,3,1,3,2,1])

[3, 1, 2]

In [150]:
# Tells where the query last occurred in the collection
last_index(2, [1,2,3,4,1,1,2,3,2])

8

In [172]:
# Create a simple dictionary
d = {"Fruit: Apple":5, "Fruit: Bananas":4, "Menu: Apple Pie":34, "Ingredient: Butter": 32, "Ingredient: Salt": 5}

In [169]:
# Filter out only fruits from the dictionary
fruit = lambda query: re.match(r"Fruit: \w+", query)
filter_keys(d, fruit)

{'Fruit: Apple': 5, 'Fruit: Bananas': 4}

In [171]:
# Filter out only items with less than 10 units cost
cost = lambda query: query < 10
filter_values(d, cost)

{'Fruit: Apple': 5, 'Fruit: Bananas': 4}

In [174]:
# Filter out ingredients with cost less than 10 units
cheap_ingredient = lambda k, v: re.match(r"Ingredient: \w+", k) and v < 10
filter_dict(d, cheap_ingredient) 

{'Ingredient: Salt': 5}

# Regular Expressions

Regexes short for regular expressions are a language in themselves. They are a language which specifies a set of rules for representation of common language strings like English in a particular syntax.

This can help us find if a given word/phrase/sentence(more generally strings) contains a particular pattern that we want to be there and more powerfully, extract such patterns whenever they're encountered from such strings.

Let us play around with the pets dataset URLs using regex to understand the importance of this language and subsequently how this could help us in Deep Learning.

Here is a really good one stop resource for understanding different characters/patterns in regex which can help create our custom query patterns for string matching.

[Data Quest Regex Cheatsheet](https://www.dataquest.io/wp-content/uploads/2019/03/python-regular-expressions-cheat-sheet.pdf)

In python, we have the module called `re` which comes in pre-built that could be used to leverage regular expressions.

In [82]:
import re

In [87]:
path.ls()

(#2) [Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/annotations')]

In [89]:
images_path = path/"images"
images_path.ls()

(#7393) [Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/samoyed_48.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/chihuahua_119.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/american_pit_bull_terrier_82.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Bengal_182.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/keeshond_190.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/english_setter_96.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Abyssinian_101.mat'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Bengal_75.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/saint_bernard_195.jpg'),Path('/home/vinayak/.fastai/data/oxford-iiit-pet/images/Siamese_80.jpg')...]

The paths in `oxford-iiit dataset` are named in a very `structured` way. This helps us leverage regex to understand more about the dataset.

Precisely the way data is structured is:
- The name of all dog breed files start with a small letter
- The name of all files is in the format \<breed\>_\<number\>.jpg
- If the breed is a multiple word breed, then the breed will be represented by `_` in between words

## Using regex to get all the cat files

In [92]:
filenames = L([x.name for x in images_path.ls()])
filenames

(#7393) ['samoyed_48.jpg','chihuahua_119.jpg','american_pit_bull_terrier_82.jpg','Bengal_182.jpg','keeshond_190.jpg','english_setter_96.jpg','Abyssinian_101.mat','Bengal_75.jpg','saint_bernard_195.jpg','Siamese_80.jpg'...]

In [95]:
cat_pattern = r"[A-Z].+"
cats = L([re.findall(cat_pattern, x) for x in filenames])
cats

(#7393) [[],[],[],['Bengal_182.jpg'],[],[],['Abyssinian_101.mat'],['Bengal_75.jpg'],[],['Siamese_80.jpg']...]

In [99]:
cats = L([x[0] for x in cats if len(x) > 0])
cats

(#2403) ['Bengal_182.jpg','Abyssinian_101.mat','Bengal_75.jpg','Siamese_80.jpg','Bombay_164.jpg','Ragdoll_118.jpg','Siamese_126.jpg','Bengal_1.jpg','Birman_17.jpg','Russian_Blue_47.jpg'...]

## Using regex to get all the cat breeds

Now that we have cat files, we can get the cat breeds from these using another regex as follows

In [105]:
filenames

(#7393) ['samoyed_48.jpg','chihuahua_119.jpg','american_pit_bull_terrier_82.jpg','Bengal_182.jpg','keeshond_190.jpg','english_setter_96.jpg','Abyssinian_101.mat','Bengal_75.jpg','saint_bernard_195.jpg','Siamese_80.jpg'...]

In [123]:
cat_breed_pattern = r"(^[A-Z].+)_\d+"
cat_breeds = L([re.findall(cat_breed_pattern, x) for x in filenames])
cat_breeds = L([x[0] for x in cat_breeds if len(x) > 0])
cat_breeds = L(uniqueify(cat_breeds))

In [124]:
cat_breeds

(#12) ['Bengal','Abyssinian','Siamese','Bombay','Ragdoll','Birman','Russian_Blue','Egyptian_Mau','Maine_Coon','Persian'...]

## Using regex to get all the dog breeds

In [115]:
filenames

(#7393) ['samoyed_48.jpg','chihuahua_119.jpg','american_pit_bull_terrier_82.jpg','Bengal_182.jpg','keeshond_190.jpg','english_setter_96.jpg','Abyssinian_101.mat','Bengal_75.jpg','saint_bernard_195.jpg','Siamese_80.jpg'...]

In [121]:
dog_breed_pattern = r"(^[a-z].+)_\d+"
dog_breeds = L([re.findall(dog_breed_pattern, x) for x in filenames])
dog_breeds = L([x[0] for x in dog_breeds if len(x) > 0])
dog_breeds = L(uniqueify(dog_breeds))

In [122]:
dog_breeds

(#25) ['samoyed','chihuahua','american_pit_bull_terrier','keeshond','english_setter','saint_bernard','boxer','great_pyrenees','basset_hound','leonberger'...]

## Using regex to get all dogs and all cats

In [125]:
filenames

(#7393) ['samoyed_48.jpg','chihuahua_119.jpg','american_pit_bull_terrier_82.jpg','Bengal_182.jpg','keeshond_190.jpg','english_setter_96.jpg','Abyssinian_101.mat','Bengal_75.jpg','saint_bernard_195.jpg','Siamese_80.jpg'...]

In [126]:
dog_pattern = r"^[a-z].+_\d+.jpg"
dogs = L([re.findall(dog_pattern, x) for x in filenames])
dogs = L([x[0] for x in dogs if len(x) > 0])
dogs

(#4990) ['samoyed_48.jpg','chihuahua_119.jpg','american_pit_bull_terrier_82.jpg','keeshond_190.jpg','english_setter_96.jpg','saint_bernard_195.jpg','boxer_36.jpg','great_pyrenees_175.jpg','chihuahua_105.jpg','basset_hound_23.jpg'...]

In [128]:
cat_pattern = r"^[A-Z].+_\d+.jpg"
cats = L([re.findall(cat_pattern, x) for x in filenames])
cats = L([x[0] for x in cats if len(x) > 0])
cats

(#2400) ['Bengal_182.jpg','Bengal_75.jpg','Siamese_80.jpg','Bombay_164.jpg','Ragdoll_118.jpg','Siamese_126.jpg','Bengal_1.jpg','Birman_17.jpg','Russian_Blue_47.jpg','Egyptian_Mau_171.jpg'...]

## Check the union of cats and dogs equals all files

In [130]:
2400+4990

7390

In [131]:
filenames

(#7393) ['samoyed_48.jpg','chihuahua_119.jpg','american_pit_bull_terrier_82.jpg','Bengal_182.jpg','keeshond_190.jpg','english_setter_96.jpg','Abyssinian_101.mat','Bengal_75.jpg','saint_bernard_195.jpg','Siamese_80.jpg'...]

In [134]:
set(filenames).difference(set(cats).union(set(dogs)))

{'Abyssinian_100.mat', 'Abyssinian_101.mat', 'Abyssinian_102.mat'}