In [None]:
from PIL import Image
import os
from random import randint
import numpy as np
import matplotlib.pyplot as plt

In [None]:
img_folder = 'F:/DL/val2014/'
imgs = os.listdir(img_folder)

batch_x = []

# Loop through the selected images (index 10 to 19) and process each one
for p in imgs[10:20]:
    # Construct the full path to the image
    img_path = os.path.join(img_folder, p)
    
    # Open the image
    img = Image.open(img_path)
    
    # Resize the image to (224, 224) to  have all the images a consistent dimension
    img = img.resize((224, 224))
    
    # Append the resized image to the batch_x list
    batch_x.append(img)

#we could have done the same thing using list comprehension
# batch_x = [Image.open(img_folder + p).resize((224,224)) for p in imgs[10:20]]

So we have taken only 10 images.

In [None]:
batch_x[0] #first image of our considered batch

In [None]:
def normalize_image(x): # to scale and shift the pixel values to bring them to a standard range. 
    x = x / np.max(x)   #maximum pixel value is 255
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    return (x-mean) / std

The values [0.485, 0.456, 0.406] represent the mean pixel values, and [0.229, 0.224, 0.225] represent the standard deviation of the pixel values calculated from the ImageNet dataset. Since ImageNet is a widely used dataset for training deep learning models, these values have been determined empirically for ImageNet images.

The reason there are three values in the mean and standard deviation arrays is because most images used in deep learning are in color and are represented as Red, Green, and Blue (RGB) channels.

However, it's important to note that these values are not universal and may not be optimal for every dataset or task. Depending on your specific use case, you may need to calculate the mean and standard deviation from your own dataset if it differs significantly from ImageNet. Nonetheless, using these values as a starting point is a common practice, and you can adjust them based on your needs.

In [None]:
def denormalize_image(x):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    x = std*x + mean
    x = np.clip(x, 0 , 1)
    return x

x = std*x + mean: This line denormalizes the input image x. It multiplies each channel of the image by the corresponding standard deviation (std) and then adds the corresponding mean (mean) to each channel. This reverses the normalization process that was applied to the image.

x = np.clip(x, 0 , 1): After denormalization, this line clips the pixel values of the image x to ensure that they fall within the range [0, 1]. This is done to make sure that the pixel values are valid and don't go below 0 or above 1, which is the typical range for image data

In [None]:
lam = 0.65
batch_size = len(batch_x)

In [None]:
current_image = normalize_image(np.array(batch_x[0]))  #this is the image that we picked

In [None]:
current_image.shape

if we worked with PyTorch, then the shape would be (3, 224, 224)

In [None]:
plt.imshow(denormalize_image(current_image))  #to show the image, we need to denormalize again
plt.axis("off")

In [None]:
random_index = randint(0, batch_size - 1)
random_image_from_batch = normalize_image(np.array(batch_x[random_index]))
#this is the image that is randomly picked and want to mix it up with our true image

In [None]:
plt.imshow(denormalize_image(random_image_from_batch))
plt.axis("off")

In [None]:
#this is the formula for mixing the images
mixed_image = lam * current_image + (1 - lam) * random_image_from_batch

In [None]:
plt.imshow(denormalize_image(mixed_image))
plt.axis("off")

we are going to feed this image to the network, not the "current image" or "random image"

Now for label, for the sake of explanation consider first image as "water" and second image as "person"
if we take lambda=0.7, then 0.7 for the first image label and 0.3 for second image label. Our labels are not one-hot vector anymore.

Loss = lambda*XE(pred,y_a)+(1-lambda)*XE(pred,y_b)

pred = predicted image label for "current image".
y_a = true image label for "current image".


pred = predicted image label for "random image".
y_b = true image label for "random image".
