In [103]:
#Loss : In machine learning, Loss function is used to find error or deviation in the learning process.

# Keras provides quite a few loss function in the losses module and they are as follows

#1. mean_squared_error
#2. mean_absolute_error
#3. mean_absolute_percentage_error
#4. mean_squared_logarithmic_error
#5. squared_hinge
#6. hinge
#7. categorical_hinge
#8. logcosh
#9. huber_loss
#10. categorical_crossentropy
#11. sparse_categorical_crossentropy
#12. binary_crossentropy
#13. kullback_leibler_divergence
#14. poisson
#15. cosine_proximity
#16. is_categorical_crossentropy

In [None]:
'''
Probabilistic Losses
    - Binary Crossentropy (BCE)
    - Categorical Crossentropy or SoftMax Loss
    - Sparse Categorical Crossentropy
    - Poisson
    - Kullback-Leibler Divergence
'''

In [None]:
'''
Regression losses
    - Mean Squared error
    - Mean Absolute Error
    - Mean Absolute percentage error
    - Mean Squared Logarithmic Error
    - Cosine Similarity
    - Huber
    - Log Cosh
'''

In [None]:
'''
Hinge losses for “maximum-margin” classification
    - Hinge
    - Squared Hinge
    - Categorical Hinge
'''

In [42]:
'''
Hinge
loss = maximum(1 - y_true * y_pred, 0)


y_true values are expected to be -1 or 1
If binary (0 or 1) labels are provided they will be converted to -1 or 1.

'''

y_true = [[0., 1.], [0., 0.]]
y_pred = [[0.6, 0.4], [0.4, 0.6]]

h = tf.keras.losses.Hinge()
    
h(y_true,y_pred).numpy()

1.3

In [52]:
# np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1)

In [33]:
'''
SquaredHinge class

loss = square(maximum(1 - y_true * y_pred, 0))

y_true values are expected to be -1 or 1. If binary (0 or 1) labels are provided we will convert them to -1 or 1

'''
y_true = [[0., 1.], [0., 0.]]
y_pred = [[0.6, 0.4], [0.4, 0.6]]

h = tf.keras.losses.SquaredHinge()

h(y_true,y_pred).numpy()

1.86

In [None]:
# np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1)

In [35]:
# Calling with 'sample_weight'
h(y_true, y_pred, sample_weight=[1, 0]).numpy()

0.73

In [38]:
# Using 'sum' reduction type.  
h = tf.keras.losses.SquaredHinge(reduction="sum")

h(y_true, y_pred).numpy()

3.72

In [39]:
# Using 'None' reduction type.  
h = tf.keras.losses.SquaredHinge(reduction="none")

h(y_true, y_pred).numpy()

array([1.46, 2.26], dtype=float32)

In [40]:
'''

CategoricalHinge class

loss = maximum(neg - pos + 1, 0) where neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)

pos = np.sum(y_true * y_pred, axis=-1)
neg = np.amax((1. - y_true) * y_pred, axis=-1)
loss = np.maximum(0., neg - pos + 1.)


y_true values are expected to be 0 or 1.

'''

y_true = [[0, 1], [0, 0]]
y_pred = [[0.6, 0.4], [0.4, 0.6]]

h = tf.keras.losses.CategoricalHinge()
h(y_true, y_pred).numpy()

1.4000001

In [None]:
'''

Each observation is weighted by the fraction of the class it belongs to (reversed) so that the loss for minority 
class observations is more important when calculating the loss.  

weights = { 0:1.01300017,1:0.88994364,2:1.00704935, 3:0.97863318,4:1.02704553, 5:1.10680686,6:1.01385603,7:0.95770152, 
            8:1.02546573,9:1.00857287}
            
model.fit(x_train, y_train,verbose=1, epochs=10,class_weight=weights)

'''


'''

# You can also pass weights at the compile stage.
weights = [1.013, 0.889, 1.007, 0.978, 1.027,1.106,1.013,0.957,1.025, 1.008]

model.compile(optimizer=tf.keras.optimizers.SGD(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              loss_weights=weights,
              metrics=['accuracy']
'''

In [None]:
# loss functions are passed during the compile stage

loss_function = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(loss=loss_function, optimizer='adam')

In [7]:
import tensorflow as tf
tf.enable_eager_execution()

In [None]:
# Mean Squared Error : 01

# MSE = np.square(np.subtract(Y_true,Y_pred)).mean()

mse = tf.keras.losses.MeanSquaredError()

y_true = [[45., 40.], 
          [0., 0.]]
y_pred = [[12., 4.], 
          [1., 0.]]

# How it is working

'''
Step 1:  Find the First batch value and get MSE 

y_true = [45., 40.]
y_pred = [12., 4.]

np.sum([np.square((x-y)) for x,y in zip(y_true, y_pred)]) / len(y_true) = 1192.5

Step 2:  Find the Second batch value and get MSE 

y_true = [45., 40.]
y_pred = [12., 4.]

np.sum([np.square((x-y)) for x,y in zip(y_true, y_pred)]) / len(y_true) = 0.5

Step 3:  Find Average value of both : (1192.5 + 0.5) / 2 = 596.5

'''

mse(y_true, y_pred).numpy()


In [110]:
# Mean Squared Error : 02


In [127]:
'''
Mean Squared Logarithmic Error : (MSLE)

MSE = np.square(np.subtract(np.log(Y_true +1),np.log(Y_pred +1))).mean()

Find the relative difference between the true and the predicted value

'''

y_true = [[0., 1.], [0., 0.]]
y_pred = [[1., 1.], [1., 0.]]

y_true = [45., 40]
y_pred = [12., 4]
msle = tf.keras.losses.MeanSquaredLogarithmicError()

'''
msle = np.sum([np.square((np.log(x +1)-np.log(y+1))) for x,y in zip(y_true, y_pred)]) / len(y_true)

'''
msle(y_true, y_pred).numpy()

3.0121489

In [128]:
'''
Mean Absolute Error :MAE

MAE = np.abs(np.subtract(Y_true,Y_pred)).mean()

Cosider only the magniture (length : sqrt.(x2 + y2) )
does not conisder direction (Negative or Positisve)

'''

'''
y_true = [[0., 1.], 
          [0., 0.]]

y_pred = [[1., 1.], 
          [1., 0.]]
          
'''

y_true = [45., 40]
y_pred = [12., 4]


mae = tf.keras.losses.MeanAbsoluteError()

'''np.sum([np.abs(x-y) for x,y in zip(y_true,y_pred)])/2'''

mae(y_true, y_pred).numpy()


34.5

In [138]:
'''
# Mean Absolute percentage error

MAPE = (np.abs(np.subtract(Y_true,Y_pred)) / Y_true).mean() * 100

Cosider only the magniture (length : sqrt.(x2 + y2) )
does not conisder direction (Negative or Positisve)

'''

'''
y_true = [[0., 1.], 
          [0., 0.]]

y_pred = [[1., 1.], 
          [1., 0.]] 
'''

y_true = [45., 40]
y_pred = [12., 4]

mape = tf.keras.losses.mean_absolute_percentage_error

'''(np.sum([(np.abs(x-y)/x) for x,y in zip(y_true,y_pred)])/2) * 100'''

mape(y_true, y_pred).numpy()


81.666664

In [198]:
# Cosine Similarity
# Measure of similarity between two non-zero vectors of an inner product space
# cosine similarity (CS) = (A . B) / (||A|| ||B||)

'''
# got product of vector A and Vector B
dot_ab = np.sum([(num1*num2) for num1,num2 in zip(y_true,y_pred)])

# Magniture of vector A
mag_a = np.sqrt(np.sum([num1*num1 for num1 in y_true]))

# Magniture of vector A
mag_b = np.sqrt(np.sum([num2*num2 for num2 in y_pred]))

cosime_similarity  = dot_ab/ (mag_a*mag_b) = 0.7999999999999998

'''

'''
y_true = [[0., 1.], [1., 1.]]
y_pred = [[1., 0.], [1., 1.]]

'''
y_true = [1., 1., 1., 1., 0.,0.]
y_pred = [0., 1., 1., 1., 1., 1.]


cosine_loss = tf.keras.losses.CosineSimilarity()

cosine_loss(y_true, y_pred).numpy()

0.67082036

In [257]:
'''
Huber :(MSE + MAE) based on delta value we chosse either MSE or MAE

This function is quadratic(MSE) for small values of a and linear(MAE) for large values, 

loss = 0.5 * x^2                  if |x| <= d
loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d

where d is delta

'''

'''
y_true = [24.0,50.0,15.0,38.0,87.0]
y_pred = [21.54,47.46,17.21,36.58,87.28]
delta =4

# distance between Actual and true value is less than 4 : So delta :4
# [np.subtract(x,y) for x,y in zip(y_true,y_actual)]

tota_error =0
tota_points = 0

for index,value in enumerate(y_true):
    tota_points +=1
    error = y_true[index] - y_pred[index]
    
    if error <= delta:
        huber_error = 0.5 * round((error*error)/2,2)
    else:
        #huber_error = round(delta*error) / (0.5 * (delta*delta))
        huber_error = 0.5 * (delta*delta) + delta * (np.abs(error) - delta)
    tota_error += huber_error

huber = round(tota_error/tota_points,2)

'''

y_true = [[0, 1], [0, 0]]
y_pred = [[0.5, 0.4], [0.4, 0.5]]

y_true = [24.0,50.0,15.0,38.0,87.0]
y_pred = [21.54,47.46,17.21,36.58,87.28]

hub_loss = tf.keras.losses.Huber(delta=4.0) # default delta =1
hub_loss(y_true, y_pred).numpy()


1.9482092

In [260]:
'''
Log Cosh : Computes the logarithm of the hyperbolic cosine of the prediction error
Log-cosh is the logarithm of the hyperbolic cosine of the prediction error.

logcosh = log((exp(x) + exp(-x))/2)

x is the error y_pred - y_true.

'''
y_true = [[0., 1.], [0., 0.]]
y_pred = [[1., 1.], [0., 0.]]


y_true = [24.0,50.0,15.0,38.0,87.0]
y_pred = [21.54,47.46,17.21,36.58,87.28]

l = tf.keras.losses.LogCosh()

l(y_true, y_pred).numpy()


1.195665

In [266]:
# Hinge Loss Function

y_true = [[0., 1.], [0., 0.]]
y_pred = [[0.5, 0.4], [0.4, 0.5]]

y_true = [24.0,50.0,15.0,38.0,87.0]
y_pred = [21.54,47.46,17.21,36.58,87.28]

h_loss = tf.keras.losses.Hinge()
h_loss(y_true, y_pred).numpy()

0.0

In [None]:
#  Squared Hinge

In [267]:
#  Categorical Hinge

In [478]:

'''
# Binary Cross Entropy : 
    - used where two class present in the traget varible
    - By default, the sum_over_batch_size reduction is used.
    - This means that the loss will return the average of the per-sample losses in the batch.

'''


y_true = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
y_pred = [0.8, 0.9, 0.9, 0.6, 0.8, 0.1, 0.4, 0.2, 0.1, 0.3]

# The prediction is a probability vector, meaning it represents predicted probabilities of all classes, summing up to 1.

'''
result = []

for i in range(len(actual)):
    p = y_true[i]
    q = y_pred[i]
    
    ce = - p * np.log(q) - (1-p)* np.log(1-q)
    
    result.append(ce)
    print("y={0} yhat={1} ce:{2}".format(p,q,ce))
    
    
print("Average Cross Entropy:{0}".format(np.mean(result)))

'''

bce = tf.keras.losses.BinaryCrossentropy()

bce(y_true, y_pred).numpy()

0.24691972

In [587]:
y_true= [0,0,0,1]
y_pred = [0.1,0.3,0.2,0.4]


cce = tf.keras.losses.BinaryCrossentropy()

cce(y_true, y_pred).numpy()

0.40036726

In [589]:
result = []

for i in range(len(y_true)):
    p = y_true[i]
    q = y_pred[i]
    
    ce = - p * np.log(q) - (1-p)* np.log(1-q)
    
    result.append(ce)
    #print("y={0} yhat={1} ce:{2}".format(p,q,ce))
    
np.mean(result)
    

0.4003674356962309

In [None]:
# What is a good cross-entropy score?

'''
Cross-Entropy = 0.00: Perfect probabilities.
Cross-Entropy < 0.02: Great probabilities.
Cross-Entropy < 0.05: On the right track.
Cross-Entropy < 0.20: Fine.
Cross-Entropy > 0.30: Not great.
Cross-Entropy > 1.00: Terrible.
Cross-Entropy > 2.00 Something is broken.

'''

In [11]:
# Log Loss : “log loss“,or  “cross-entropy” or “negative log-likelihood” a

from sklearn.metrics import log_loss
from numpy import asarray

# define data as expected, e.g. probability for each event {0, 1}

# calculate the average log loss

ll = log_loss(y_true, y_pred)
print('Average Log Loss: %.3f' % ll)

Average Log Loss: 0.400


In [12]:
def softmax(z):
    """Softmax function"""
    return np.exp(z) / np.sum(np.exp(z))

In [13]:

# Categorical Cross Entropy

#- It compares the predicted probability distribution with target probability distribution

#y_true = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
#y_pred = [0.8, 0.9, 0.9, 0.6, 0.8, 0.1, 0.4, 0.2, 0.1, 0.3] 

y_true= [0,0,0,1]
y_pred = [0.1,0.3,0.2,0.4]  # predicted probability value


cce = tf.keras.losses.CategoricalCrossentropy()

cce(y_true, y_pred).numpy()

0.9162907

In [14]:
# CCE manualy

np.sum(-(y_true * np.log(y_pred)))

0.916290731874155

In [15]:
import tensorflow as tf
import numpy as np

y_true = [1., 2.]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

loss = tf.keras.losses.sparse_categorical_crossentropy(tf.convert_to_tensort(y_true), tf.convert_to_tensor(y_pred))
loss

AttributeError: module 'tensorflow' has no attribute 'convert_to_tensort'

In [16]:
np.sum(-(y_true * np.log(y_pred)))

  """Entry point for launching an IPython kernel.


ValueError: operands could not be broadcast together with shapes (2,) (2,3) 

In [17]:
# Sparse Categorical Crossentropy


#  if you use categorical_crossentropy you use one hot encoding, 
#  if you use sparse_categorical_crossentropy you encode as normal integers. 


# When to use Categorical Cross Entropy

# If your targets are one-hot encoded, use categorical_crossentropy. Examples of one-hot encodings:

# If your targets are integers, use sparse_categorical_crossentropy

In [18]:

# Categorical Cross Entroy : One One Encoded of y_true 

y_true = [[0, 0, 1],
          [1, 0, 0],
          [0, 0, 1]]

y_pred = [[0.1, 0.1, 0.8],
          [0.5, 0.2, 0.3],
          [0.0, 0.2, 0.8]]

cce = tf.keras.losses.CategoricalCrossentropy()

cce(y_true, y_pred).numpy()

0.3798114

In [19]:
# sparse_categorical_crossentropy

y_true = [[2, 0, 2]]

y_pred = [[0.1, 0.1, 0.8],
          [0.5, 0.2, 0.3],
          [0.0, 0.2, 0.8]]

loss = tf.keras.losses.sparse_categorical_crossentropy(tf.convert_to_tensor(y_true),tf.convert_to_tensor(y_pred))

loss.numpy()

array([0.22314355, 0.6931472 , 0.22314365], dtype=float32)

In [23]:
# Poisson
# Poisson = y_pred - y_true * log(y_pred)

y_true = np.random.randint(0, 2, size=(2, 3))
y_pred = np.random.random(size=(2, 3))

loss = tf.keras.losses.poisson(y_true, y_pred)

loss

<tf.Tensor: id=138, shape=(2,), dtype=float64, numpy=array([1.32565812, 1.1877869 ])>

In [24]:
# Kullback-Leibler Divergence

# loss = y_true * log(y_true / y_pred)

y_true = [[0, 1], [0, 0]]

y_pred = [[0.6, 0.4], [0.4, 0.6]]

kl = tf.keras.losses.KLDivergence()
kl(y_true, y_pred).numpy()

0.45814306

In [26]:
#[pi * np.log2(qi/pi) for pi, qi in zip(y_true,y_pred)]

In [634]:
# Multi label classification Loss Function

logits = tf.Variable(np.array([[ 1.4397182 , -0.7993438 ,  4.113389  ,  3.2199187 ,  4.5777845 ],
                               [ 0.30619335,  0.10168511,  4.253479  ,  2.3782277 ,  4.7390924 ],
                               [ 1.124632  ,  1.6056736 ,  2.9778094 ,  2.0808482 ,  2.0735667 ],
                               [ 0.7051575 , -0.10341895,  4.990803  ,  3.7019827 ,  3.8265839 ],
                               [ 0.6333333 , -0.76601076,  3.2255085 ,  2.7842572 ,  5.3817415 ]]),dtype = tf.float32)

targets = tf.Variable(np.array([[1, 1, 0, 0, 0],
                                [0, 1, 0, 0, 1],
                                [1, 1, 1, 1, 0],
                                [0, 0, 1, 0, 1],
                                [1, 1, 1, 1, 1]]),dtype = tf.float32)


cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=targets )

cross_entropy

<tf.Tensor: id=4595, shape=(5, 5), dtype=float32, numpy=
array([[0.21268466, 1.170648  , 4.129609  , 3.2590992 , 4.58801   ],
       [0.8579177 , 0.6435966 , 4.2675934 , 2.466893  , 0.00870855],
       [0.28124034, 0.18294993, 0.04965096, 0.11762683, 2.1920042 ],
       [1.1066352 , 0.64277405, 0.00677719, 3.7263577 , 0.02155003],
       [0.4258032 , 1.147773  , 0.03896642, 0.059942  , 0.00458926]],
      dtype=float32)>