# Your own Loss Function
In this Notebook, we will look at how can we write our very own loss functions from scratch. I will be using keras framework for this purpose. You Guys can implement this notebook in other frameworks 😉 

First we will look at how some standard loss functions are written, like binary cross entropy, categorical cross entropy etc, then how can we write a arbitary loss function based on some mathematical formula.

In [None]:
import numpy as np
import keras 
import keras.backend as K
import tensorflow as tf
import math

### Binary Cross Entropy

Binary Cross Entropy is defined as: y * log(p) + (1-y) * log(1-p)

In [None]:
def naive_bce(y_true,y_pred):
    loss = (-1)* (y_true * K.log(y_pred) + (1-y_true)*K.log(1- y_pred))
    return K.mean(loss)



def bce_numpy_equivalent(y_true,y_pred):     # Important Note :- numpy equivalent Functions are there for understanding only, they shouldn't be passed to the models!!!!!
    y_true=np.asarray(y_true).flatten()
    y_pred=np.asarray(y_pred).flatten()
    loss=0
    for i in range(len(y_true)):
        loss+= y_true[i]*(math.log(y_pred[i]))+(1-y_true[i])*math.log(1-y_pred[i])
    loss*=-1
    return loss/len(y_true)
    
    

Now as y_pred approaches 1 or more, 1-y_pred becomes negative and hence k.log(1-y_pred) -> Nan. Similarly when y_pred is 0, k.log(y_pred)-> Nan

In [None]:
y_true=K.variable([[0, 1],[0, 0]])    # batch size = 2
y_pred=K.variable([[0.43, 0.51],[0.32, 0.49]])
naive_bce(y_true,y_pred).numpy()

In [None]:
bce_numpy_equivalent(y_true,y_pred)

Our bce function works!!!!!

In [None]:
# Comparing with Keras BCE
BCE=keras.losses.BinaryCrossentropy(from_logits=False)
BCE(y_true,y_pred).numpy()

### Categorical Cross entropy

Categorical Cross entropy can be defined as −  (y[i] * log(y_pred[i])) for all i in 1 to n_class


In [None]:
from tensorflow.python.ops import clip_ops
from tensorflow.python.framework import constant_op
from tensorflow.python.ops import math_ops

In [None]:
def cce(y_true,y_pred):
    if len(y_true.shape)==2:     #batch_size=1
        samples=y_true.shape[0]
        batch=1
    else:                        #batch_size>1  
        samples=y_true.shape[1]
        batch=y_true.shape[0]
    loss=(-1)*(y_true*K.log(y_pred))
    loss=(math_ops.reduce_sum(loss))/(samples*batch)
    return loss


def cce_numpy_equivalent(y_true,y_pred):
    if len(y_true.shape)==2:     #batch_size=1
        samples=y_true.shape[0]
        batch=1
    else:                        #batch_size>1  
        samples=y_true.shape[1]
        batch=y_true.shape[0]
    y_true=np.asarray(y_true).flatten()
    y_pred=np.asarray(y_pred).flatten()
    loss=0
    for i in range(len(y_true)):
        loss+= y_true[i]*(math.log(y_pred[i]))
    loss*=-1
    return loss/(samples*batch)
    
    
    

In [None]:
y_true=K.variable([[[0, 1, 0], [0, 0, 1]],[[0, 1, 0], [0, 0, 1]]])    # batch size = 2
y_pred=K.variable([[[0.05, 0.9499, 0.0001], [0.1, 0.8, 0.1]],[[0.05, 0.9499, 0.0001], [0.1, 0.8, 0.1]]])
cce(y_true,y_pred).numpy()

In [None]:
cce_numpy_equivalent(y_true,y_pred)

In [None]:
# Comparing with Keras CCE
CCE=keras.losses.CategoricalCrossentropy(from_logits=False)
CCE(y_true,y_pred).numpy()

# Mean Squared Eroor

In [None]:
def mse(y_true,y_pred):
    loss = K.mean(K.square(y_true - y_pred), axis=-1)
    return K.mean(loss)

def mse_numpy_equivalent(y_true,y_pred):
    y_true=np.asarray(y_true).flatten()
    y_pred=np.asarray(y_pred).flatten()
    loss=0
    for i in range(len(y_true)):
        loss+=(y_true[i]-y_pred[i])*(y_true[i]-y_pred[i])
    return loss/len(y_true)

In [None]:
y_true = K.variable([[0., 1.], [0., 0.]])
y_pred =K.variable( [[1., 1.], [1., 0.]])
mse(y_true,y_pred).numpy()

In [None]:
mse_numpy_equivalent(y_true,y_pred)

In [None]:
# Comparing with Keras MSE
MSE = keras.losses.MeanSquaredError()
MSE(y_true,y_pred).numpy()

# Mean Absolute Eroor

In [None]:
def mae(y_true,y_pred):
    loss = K.mean(abs(y_true - y_pred), axis=-1)
    return K.mean(loss)

def mae_numpy_equivalent(y_true,y_pred):
    y_true=np.asarray(y_true).flatten()
    y_pred=np.asarray(y_pred).flatten()
    loss=0
    for i in range(len(y_true)):
        loss+=abs(y_true[i]-y_pred[i])
    return loss/len(y_true)

In [None]:
y_true = K.variable([[0., 1.], [0., 0.]])
y_pred =K.variable( [[1., 1.], [1., 0.]])
mae(y_true,y_pred).numpy()

In [None]:
mae_numpy_equivalent(y_true,y_pred)

In [None]:
# Comparing with Keras MAE
MAE = keras.losses.MeanAbsoluteError()
MAE(y_true,y_pred).numpy()

# Custom Loss Functions

The code for the Custom Loss function depends first and foremost on its definition. So for writing a custom loss function its mathematical definition should be pretty damn clear.

Say we define Our Loss Function as mean ([ y^3 + p^3 ] - [ y^2 + p^2 ] -[ y + p ]) .
We can take any other mathematical definition too, that simply depends upon the task you have to perform.

In [None]:
def custom_loss(y_true, y_pred):
    loss=K.mean((pow(y_true,3)+pow(y_pred,3))-(pow(y_true,2)+pow(y_pred,2))-(pow(y_true,1)+pow(y_pred,1)))
    return K.mean(loss)



def custom_loss_numpy_equivalent(y_true, y_pred):
    y_true=np.asarray(y_true).flatten()
    y_pred=np.asarray(y_pred).flatten()
    loss=0
    for i in range(len(y_true)):
        loss+=(pow(y_true[i],3)+pow(y_pred[i],3)) - (pow(y_true[i],2)+pow(y_pred[i],2)) -(pow(y_true[i],1)+pow(y_pred[i],1))
    return loss/len(y_true)
    
    

In [None]:
y_true = K.variable([[0., 1.], [0., 0.]])
y_pred =K.variable( [[1., 1.], [1., 0.]])
custom_loss(y_true,y_pred).numpy()

In [None]:
custom_loss_numpy_equivalent(y_true, y_pred)

# Using Our loss Functions In a Deep Learning Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split


data = pd.read_csv('../input/oranges-vs-grapefruit/citrus.csv')
col=data.columns

In [None]:
data

In [None]:
target=[]
for i in range(len(data)):
    if data['name'][i]=='orange':
        target.append(0)
    else :
        target.append(1)

In [None]:
target=np.asarray(target).astype(np.float32)
np.unique(target)

In [None]:
X=data[col[1:]]
X.head()
X=np.asarray(X).astype(np.float32)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation

In [None]:
def create_model():
    model = Sequential()
    model.add(Dense(30,input_dim=5,activation='relu'))
    model.add(Dense(5,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss=naive_bce, optimizer='adam', metrics=['accuracy'])
    return model
    

In [None]:
model=create_model()
model.summary()

In [None]:
model.fit(X_train,y_train,epochs=3,batch_size=128)

We can see as y_pred is reaching 0 or 1, so the loss is becoming *nan* , to combat this we can use alternative implementation of bce 

In [None]:
def bce(y_true,y_pred):
    loss = K.max(y_pred,0)-y_pred * y_true + K.log(1+K.exp((-1)*K.abs(y_pred)))
    return K.mean(loss)

In [None]:
def create_modelv2():
    model = Sequential()
    model.add(Dense(30,input_dim=5,activation='relu'))
    model.add(Dense(5,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss=bce, optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
modelv2=create_modelv2()
modelv2.fit(X_train,y_train,epochs=3,batch_size=128)

In [None]:
modelv2.evaluate(X_test,y_test)

# Comparing it with Standard Keras BCE Loss Function
We expect a lower acccuracy cause of the alternate implementation as it doesn't give same loss valuse as standard keras BCE loss function

In [None]:
def create_model2():
    model = Sequential()
    model.add(Dense(30,input_dim=5,activation='relu'))
    model.add(Dense(5,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
model2=create_model2()
model2.fit(X_train,y_train,epochs=3,batch_size=128)

In [None]:
model2.evaluate(X_test,y_test)

# Regression Example

In [None]:

from sklearn.datasets import make_regression
from matplotlib import pyplot

X, y = make_regression(n_samples=10000, n_features=10,  n_targets=1, bias=0.1, noise=0.1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def create_model2():
    model = Sequential()
    model.add(Dense(30,input_dim=10,activation='relu'))
    model.add(Dense(5,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss=mse, optimizer='adam', metrics=['mse'])
    return model

In [None]:
model2=create_model2()
model2.fit(X_train,y_train,epochs=3,batch_size=12)

In [None]:
model2.evaluate(X_test,y_test)

# Comparing it with Standard Keras BCE Loss Function

In [None]:
def create_model2():
    model = Sequential()
    model.add(Dense(30,input_dim=10,activation='relu'))
    model.add(Dense(5,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
    return model

In [None]:
model=create_model2()
model.fit(X_train,y_train,epochs=3,batch_size=12)

In [None]:
model.evaluate(X_test,y_test)

As we can see we have got exactly the same result with our custom mse loss function and standard keras mse loss function

# IF You got to learn something or you enjoyed the notebook. Please Do upvote 😅
## Follow me for upcoming tutorials and please show some love to my previous notebooks.😅
### Peace Out!!
### Sourabh Yadav!