<a href="https://colab.research.google.com/github/Hasanmog/Journey-Through-AI-Learning/blob/main/DeepLearning_With_Python/DeepLearning_with_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning for ComputerVision


## Introduction

working with the MNIST dataset (handwritten numbers)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers , models , datasets , utils

In [None]:
model = models.Sequential()

# NN
model.add(layers.Conv2D(32 , (3 , 3) , activation = 'relu' , input_shape = (28,28 , 1))) # (3,3) is the size of the window where the conv learn local patterns
model.add(layers.MaxPooling2D(2 , 2))
model.add(layers.Conv2D(64 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64 , (3,3) , activation = 'relu'))


In [None]:
model.summary()

In [None]:
# In order to make the prediction , we need a densley connected classifier
# Dense layer takes 1D tensor only !
# Therfore we need to transform the data coming from the NN(3D) to 1D

model.add(layers.Flatten()) # used to convert to 1D --> (x , y , z) --> (x * y * z)
model.add(layers.Dense(64 , activation = 'relu'))
model.add(layers.Dense(10 , activation = 'softmax'))

In [None]:
model.summary()

In [None]:
(x_train , y_train) , (x_test , y_test) = keras.datasets.mnist.load_data() # X --> images(inputs) , y --> labels (target)

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
# we have 60,000 training samples

In [None]:
print(y_train.shape)
print(y_train[:10])

In [None]:
x_train = x_train.reshape((60000 , 28 , 28 , 1)) # adding the channel dimension which is 1 here (grey-scale)
x_test = x_test.reshape((10000 , 28 , 28 , 1))

x_train = x_train / 255 # normalize
x_test = x_test / 255

y_train = utils.to_categorical(y_train) # Converts a class vector (integers) to binary class matrix.
print(y_train[:10])
print(y_train.shape)

y_test = utils.to_categorical(y_test)


In [None]:
model.compile(optimizer = 'rmsprop' , loss = 'categorical_crossentropy' , metrics = ['accuracy'] )
model.fit(x_train , y_train , batch_size = 64 , epochs = 5 )

## Training a ConvNet from Scratch

### Installing dataset from Kaggle

In [None]:
!pip install kaggle

In [None]:
import os

os.environ['KAGGLE_USERNAME'] =  "hasanmog"  # replace with your Kaggle username
os.environ['KAGGLE_KEY'] =  "f3ccafa264265fe4f3baf59bda89bd0f" # replace with your Kaggle key


In [None]:
!kaggle datasets download -d biaiscience/dogs-vs-cats # after -d replace with owner_name/dataset_name

In [None]:
!unzip dogs-vs-cats.zip

### Dataset Prep

In [None]:
import os , shutil

In [None]:
os.mkdir("dataset")
os.mkdir("dataset/train")
os.mkdir("dataset/val")
os.mkdir("dataset/test")

In [None]:
orig_train_dir = 'train/train'
orig_test_dir = 'test/test'

In [None]:
animals = ['cat' , 'dog']
samples = 500 # train 1000 , val 500 , test 500
sample_per_animal = int(samples/len(animals))

train_dir = 'dataset/train/'
val_dir = 'dataset/val'
test_dir = 'dataset/test'

for animal in animals:
  print(f"now {animal}")
  for sample in range(sample_per_animal ):
    ex = f"{animal}.{sample+1000  }.jpg" # when adding samples to the val_dir , add 1000 to the sample
    print(ex)
    src = os.path.join(orig_train_dir , ex)
    dest = os.path.join(val_dir , ex) # replace train_dir with val_dir for validation split
    shutil.copyfile(src , dest)

In [None]:
samples = 499
test_samples = os.listdir(orig_test_dir)
s = test_samples[:samples+1]

for sample in s:

  src = os.path.join(orig_test_dir , sample)
  dest = os.path.join(test_dir , sample )
  shutil.copyfile(src , dest)

In [None]:
train_samples = os.listdir(train_dir)
# note , for test it will return 0 , since the samples are not named after the animal
val_samples = os.listdir(val_dir)
test_samples = os.listdir(test_dir)

print("number of training samples" , len(train_samples))
print("number of val samples" , len(val_samples))
print("number of test samples" , len(test_samples))

dogs = 0
cats = 0

for train in train_samples : #change the train_samples to val_samples

  if train.startswith('dog'):
    dogs+=1

  if train.startswith('cat'):
    cats+=1


print("number of dogs samples" , dogs)
print("number of cats samples" , cats)

In [None]:
import cv2

img = cv2.imread(os.path.join(train_dir , train_samples[10]))

size = (250 , 250)

img = cv2.resize(img , size)

In [None]:
from google.colab.patches import cv2_imshow
cv2_imshow(img)

In [None]:
print(train_samples[0])

In [None]:
import os

base_train_dir = "dataset/train"
base_val_dir = "dataset/val"
base_test_dir = "dataset/test"

dogs = []
cats = []

for m in os.listdir(base_val_dir):

  if  m.startswith('dog'):
    dogs.append(m)

  if m.startswith('cat'):
    cats.append(m)

# train_dogs_dir = os.mkdir("dataset/train/dogs")
# train_cats_dir = os.mkdir("dataset/train/cats")
# val_dogs_dir = os.mkdir("dataset/val/dogs")
# val_cats_dir = os.mkdir("dataset/val/cats")

for i in dogs:
  src = os.path.join(base_val_dir , i)
  dest = "dataset/val/dogs"
  shutil.move(src , dest)

for i in cats:
  src = os.path.join(base_val_dir , i)
  dest = "dataset/val/cats"
  shutil.move(src , dest)

### Building Network

In [None]:
from keras import layers
from keras import models

model = models.Sequential()

model.add(layers.Conv2D(32  , (3,3) , activation = 'relu' , input_shape = (250 , 250 ,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128, (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dense(512 , activation = 'relu'))
model.add(layers.Dense(1 , activation = 'sigmoid'))

In [None]:
model.summary()

In [None]:
from tensorflow.keras import optimizers

model.compile(optimizer = optimizers.RMSprop(learning_rate = 1e-4) , loss = 'binary_crossentropy' , metrics = ['acc'] )

In [None]:
train_dir = 'dataset/train/'
val_dir = 'dataset/val'
test_dir = 'dataset/test'

In [None]:
#DataLoading

from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)


train_generator = train_datagen.flow_from_directory(train_dir ,
                                                    target_size = (250,250) ,
                                                    batch_size = 20 ,
                                                    class_mode = 'binary')

validation_generator = test_datagen.flow_from_directory(val_dir ,
                                                        target_size = (250,250) ,
                                                        batch_size = 25 ,
                                                        class_mode = 'binary')

In [None]:
#testing dataloader:

for data_batch , labels_batch in train_generator :
  print('data' , data_batch.shape)
  print('labels' , labels_batch.shape)
  break

In [None]:
import json

history = model.fit_generator(train_generator ,
                              steps_per_epoch = 50 ,
                              epochs = 10 ,
                              validation_data = validation_generator ,
                              validation_steps = 20)

model.save('cats_and_dogs_small_1.h5')

In [None]:
with open('model_history.json' , 'w') as f :
  json.dump(history.history , f)

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1 , len(acc)+1)

plt.plot(epochs , acc , 'bo' , label = 'Training Acc')
plt.plot(epochs , val_acc , 'b' , label = 'Val Acc')
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.figure()

plt.plot(epochs , loss , 'bo' , label = 'Training loss')
plt.plot(epochs , val_loss , 'b' , label = 'Val loss')
plt.title("Training vs Validation loss")
plt.legend()

plt.show()

As you can see clearly , We have overfitting problem

### Adding DropOut layer and Data Augmentation to increase dataset

In [None]:
from keras import layers
from keras import models

model = models.Sequential()

model.add(layers.Conv2D(32  , (3,3) , activation = 'relu' , input_shape = (250 , 250 ,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128, (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64 , (3,3) , activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512 , activation = 'relu'))
model.add(layers.Dense(1 , activation = 'sigmoid'))


model.summary()

In [None]:
#Augmentation

train_datagen = ImageDataGenerator(
    rescale = 1./255 ,
    rotation_range = 40 ,
    width_shift_range = 0.2 ,
    height_shift_range = 0.2 ,
    shear_range = 0.2 ,
    zoom_range = 0.2 ,
    horizontal_flip = True
)

train_generator = train_datagen.flow_from_directory(train_dir ,
                                                    target_size = (250,250) ,
                                                    batch_size = 16,
                                                    class_mode = 'binary')

In [None]:
from tensorflow.keras import optimizers

model.compile(optimizer = optimizers.RMSprop(learning_rate = 1e-4) , loss = 'binary_crossentropy' , metrics = ['acc'] )

In [None]:
history = model.fit_generator(
    train_generator ,
    steps_per_epoch = 50,
    epochs = 100 ,
    validation_data = validation_generator ,
    validation_steps = 25
)

model.save('cats_and_dogs_small_2.h5')

In [None]:
import json

with open('model_history_2.json' , 'w') as f :

  json.dump(history.history , f)

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1 , len(acc)+1)

plt.plot(epochs , acc , 'bo' , label = 'Training Acc')
plt.plot(epochs , val_acc , 'b' , label = 'Val Acc')
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.figure()

plt.plot(epochs , loss , 'bo' , label = 'Training loss')
plt.plot(epochs , val_loss , 'b' , label = 'Val loss')
plt.title("Training vs Validation loss")
plt.legend()

plt.show()

As you can see the performance increased significantly compared to unaugmented model.

By applying L1 or L2 regularization will enhance the results more.

## Using a Pretrained Model

### For Feature Extraction

will use pretrained [VGG16](https://www.tensorflow.org/api_docs/python/tf/keras/applications/vgg16/VGG16).
[Architecture](https://www.google.com/url?sa=i&url=https%3A%2F%2Fwww.geeksforgeeks.org%2Fvgg-16-cnn-model%2F&psig=AOvVaw18JAs0D2qlc6YD2pWEy9WT&ust=1703869812399000&source=images&cd=vfe&opi=89978449&ved=0CBIQjRxqFwoTCKCkvevPsoMDFQAAAAAdAAAAABAI)


In [None]:
from keras.applications import VGG16

conv_base = VGG16(
    include_top = False , # classifer
    weights = 'imagenet' ,
    input_shape = (250 , 250 , 3)
)

In [None]:
conv_base.summary()

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256 , activation = 'relu'))
model.add(layers.Dense(1 , activation = 'sigmoid'))

model.summary()

In [None]:
from keras import optimizers

model.compile(optimizer = optimizers.RMSprop(learning_rate = 2e-5) , loss = 'binary_crossentropy' , metrics = ['acc'])

In [None]:
#Needs GPU to run it


history = model.fit_generator(
    train_generator ,
    steps_per_epoch = 50 ,
    epochs = 15,
    validation_data = validation_generator ,
    validation_steps = 25
)

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1 , len(acc)+1)

plt.plot(epochs , acc , 'bo' , label = 'Training Acc')
plt.plot(epochs , val_acc , 'b' , label = 'Val Acc')
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.figure()

plt.plot(epochs , loss , 'bo' , label = 'Training loss')
plt.plot(epochs , val_loss , 'b' , label = 'Val loss')
plt.title("Training vs Validation loss")
plt.legend()

plt.show()

### FineTuning

In [None]:
conv_base.trainable = True

set_trainable = False

for layer in conv_base.layers:
  if layer == 'block5_conv1':
    set_trainable = True

  if set_trainable:
    layer.trainable = True

  else :
    layer.trainable = False

In [None]:
model.compile(loss = 'binary_crossentropy' , optimizer = optimizers.RMSprop(learning_rate = 1e-5) , metrics = ['acc'])

In [None]:
#needs GPU

history = model.fit_generator(
    train_generator ,
    steps_per_epoch = 50 ,
    epochs = 100 ,
    validation_data = validation_generator ,
    validation_steps = 50
)

In [None]:
test_generator = test_datagen.flow_from_directory(
    test_dir ,
    target_size = (250 ,250) ,
    batch_size = 20,
    class_mode = 'binary'
)

test_loss , test_acc = model.evaluate_generator(test_generator , steps = 50)
print("test_acc :" , test_acc)

# Introduction to tensorflow/keras

## Building a Simple Classifier

In [None]:
import numpy as np

num_samples_per_class = 1000

# Generate the first class of points
negative_samples = np.random.multivariate_normal(
    mean = [0 , 3] ,
    cov = [[1 , 0.5] , [0.5 , 1]], # this corresponds to oval-like point cloud oriented from bottom left to top right
    size = num_samples_per_class
)

positive_samples = np.random.multivariate_normal(
    mean = [3 , 0] ,
    cov = [[1 , 0.5] , [0.5 , 1]],
    size = num_samples_per_class
)

inputs = np.vstack((negative_samples , positive_samples)).astype(np.float32) # stacking the two classes in an array of shape (2000 , 2)

targets = np.vstack((np.zeros((num_samples_per_class , 1) , dtype = "float32") , np.ones((num_samples_per_class , 1) , dtype = 'float32')))

In [None]:
import tensorflow as tf

input_dim = 2
output_dim = 1

W = tf.Variable(initial_value = tf.random.uniform(shape = (input_dim , output_dim))) #can be modified
b = tf.Variable(initial_value= tf.zeros(shape = (output_dim ,)))

In [None]:
#Forward Pass

def model(inputs):
  return tf.matmul(inputs , W) + b

#Loss Function

def square_loss(targets , predicitions):
  per_sample_loss = tf.square(targets - predicitions)
  return tf.reduce_mean(per_sample_loss)

lr = 0.1

def training_step(inputs , targets):
  with tf.GradientTape() as tape: #This line sets up a context in which TensorFlow will record operations for the forward pass to compute gradients later.
    # Forward pass
    predicitions = model(inputs)
    loss = square_loss(targets , predicitions)
  #Retrieve the gradient of the loss with regard to weights
  grad_loss_wrt_W , grad_loss_wrt_b = tape.gradient(loss , [W,b])
  # update the weights
  W.assign_sub(grad_loss_wrt_W * lr)
  b.assign_sub(grad_loss_wrt_b * lr)
  return loss

In [None]:
for step in range(40):
  loss = training_step(inputs , targets)
  print(f"Loss at step{step}: {loss:.4f}")

In [None]:
import matplotlib.pyplot as plt

predicitions = model(inputs)
plt.scatter(inputs[: , 0] , inputs[: , 1] , c = predicitions[: , 0] > 0.5)
plt.show()

In [None]:
x = np.linspace(-1 , 4 , 100) # generate 100 regulary spaced numbers between -1 and 4

y = - W[0] / W[1] * x + (0.5 - b) / W[1]
plt.plot(x , y , "-r")
plt.scatter(inputs[: , 0] , inputs[: , 1] , c = predicitions[: , 0] > 0.5)

## Building Block of Deep Learning

In [None]:
from tensorflow import keras
import tensorflow as tf
class SimpleDense(keras.layers.Layer): #all keras layers inherit from the base Layer class

  def __init__(self , units , activation = None):
    super().__init__()
    self.units = units
    self.activation = activation

  # weight creation takes place in the build() method
  def build(self , input_shape):
    input_dim = input_shape[-1]
    '''
    add_weight() is a shortcut method for creating weights , we can use tf.Variable like before.

     '''
    self.W = self.add_weight(shape = (input_dim , self.units) , initializer = "random_normal")
    self.b = self.add_weight(shape = (self.units) , initializer = "zeros")

  # forward pass is defined in the call functions
  def call(self , inputs):
    y = tf.matmul(inputs , self.W) + self.b
    if self.activation is not None:
      y = self.activation(y)

    return y

In [None]:
my_dense = SimpleDense(units = 32 , activation = tf.nn.relu)

input_tensor = tf.ones(shape = (2 , 784))
output_tensor = my_dense(input_tensor)
print(output_tensor.shape)

In [None]:
# this is the same as the follows:

model = keras.Sequential([

    layers.Dense(32 , activation = "relu") ,
    layers.Dense(32)
]

# Advanced Deep Learning for Computer Vision

## Image Segmentation

In this example , we will focus on ** semantic segmentation **

dataset : https://www.robots.ox.ac.uk/~vgg/data/pets/

### Dataset Download

In [None]:
!wget https://thor.robots.ox.ac.uk/~vgg/data/pets/images.tar.gz
!wget https://thor.robots.ox.ac.uk/~vgg/data/pets/annotations.tar.gz

In [None]:
import tarfile
import os

# Replace 'your-file.tar.gz' with the path to your .tar.gz file
file_path = '/content/images.tar.gz'
file_path_2 = '/content/annotations.tar.gz'

# Define the directory where you want to extract the files. It can be '.' to extract in the current directory.
extract_dir = '/content/images'
extract_dir_2 = '/content/annotations'

# Make sure the extract directory exists
os.makedirs(extract_dir, exist_ok=True)

# Open the .tar.gz file and extract it
with tarfile.open(file_path, 'r:gz') as tar:
    tar.extractall(path=extract_dir)

print(f"Extracted all contents of {file_path} to {extract_dir}")


In [None]:
import os

images_dir = "/content/images/images"
annotations_dir = "/content/annotations/annotations/trimaps"

input_images = []
img_files = sorted(os.listdir(images_dir))
for files in img_files:
  if files.endswith('.jpg'):
    input_images.append(os.path.join(images_dir , files))


#More advanced way to write it (more professional):

#input_images = sorted ([
 # os.path.join(images_dir , fname) for fname in os.listdir(images_dir) if fname.endswith(".jpg")
#])

In [None]:
input_images[:10]

In [None]:
target_paths = sorted([
    os.path.join(annotations_dir , anno) for anno in os.listdir(annotations_dir) if anno.endswith(".png") and not anno.startswith(".") ])

In [None]:
target_paths[:10]

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.utils import load_img , img_to_array

index = 50
plt.axis("off")
plt.imshow(load_img(input_images[index]))

In [None]:
#annotations are pixel level trimap segmentation --> contains only 3 labels (1 , 2, 3)

def display_target(target_array):
  normalized_array = (target_array.astype("uint8") - 1) * 127 # subtract by 1 so that the labels range between 0 to 2.
  plt.axis("off")
  plt.imshow(normalized_array[: , :  , 0])

img = img_to_array(load_img(target_paths[index] , color_mode = "grayscale"))
display_target(img)

### dataset splitting

In [None]:
import numpy as np
import random

img_size = (200 , 200) # will resize everything to img_size

num_imgs = len(input_images)
print("total number of samples" , num_imgs)
random.Random(1337).shuffle(input_images)
random.Random(1337).shuffle(target_paths)

def path2image(image_path):
  return img_to_array(load_img(image_path , target_size = img_size))

def path2target(target_path):
  img = img_to_array(load_img(target_path , target_size = img_size , color_mode = 'grayscale')) # channel dim will be 1
  img = img.astype("uint8") - 1
  return img

input_imgs = np.zeros((num_imgs , ) + img_size + (3 , ) , dtype = "float32") # images are RGB
targets = np.zeros((num_imgs ,) + img_size + (1,) , dtype = "uint8") # targets are grayscale
for i in range(num_imgs):
  input_imgs[i] = path2image(input_images[i])
  targets[i] = path2target(target_paths[i])

num_val = 1000

train_input_imgs = input_imgs[: -num_val]
train_targets = targets[: -num_val]
val_input_imgs = input_imgs[-num_val:]
val_targets = targets[-num_val:]

### Model Creation

The purpose of the first half of this model is to encode the images into smaller feature maps , where each spatial location (pixel) contains information about a large spatial chunk of the image. In other words, COMPRESSION.

Difference between this and the model we did for image classification , is that we used MaxPooling layers while here we used strides to downsample. This is because , in image segmentation , we care a lot about the spatial location of information in the image since we need to produce per pixel target mask as output of the model.

2x2 maxpooling destroys completely the location information within each pooling window --> return one scaler value per window , with zero knowledge of which of the four locations in the windows the value came from.


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model , layers

def get_model(img_size , num_classes):

  inputs = keras.Input(shape = img_size + (3,)) # (200 ,200) + (3,) for RGB --> (200 , 200 , 3)
  x = layers.Rescaling(1./255)(inputs) # range between 0 and 1
  #upsample
  x = layers.Conv2D(64 , 3 ,  strides = 2 , activation = 'relu' , padding = 'same')(x)
  x = layers.Conv2D(64 , 3 ,   activation = 'relu' , padding = 'same')(x)
  x = layers.Conv2D(128 , 3 ,  strides = 2 , activation = 'relu' , padding = 'same')(x)
  x = layers.Conv2D(128, 3 ,   activation = 'relu' , padding = 'same')(x)
  x = layers.Conv2D(256 , 3 ,  strides = 2 , activation = 'relu' , padding = 'same')(x)
  x = layers.Conv2D(256 , 3 , activation = 'relu' , padding = 'same')(x)
  #downsample
  x = layers.Conv2DTranspose(256 , 3 , activation = 'relu' , padding = 'same' )(x)
  x = layers.Conv2DTranspose(256 , 3 , activation = 'relu' , padding = 'same' , strides = 2 )(x)
  x = layers.Conv2DTranspose(128 , 3 , activation = 'relu' , padding = 'same' )(x)
  x = layers.Conv2DTranspose(128 , 3 , activation = 'relu' , padding = 'same' , strides = 2)(x)
  x = layers.Conv2DTranspose(64 , 3 , activation = 'relu' , padding = 'same' )(x)
  x = layers.Conv2DTranspose(64 , 3 , activation = 'relu' , padding = 'same' , strides = 2 )(x)

  outputs = layers.Conv2D(num_classes , 3 , activation = "softmax" , padding = 'same')(x)

  model = keras.Model(inputs , outputs)
  return model

model = get_model(img_size = (200,200) , num_classes = 3)
model.summary()

In [None]:
model.compile(optimizer = 'rmsprop' , loss = 'sparse_categorical_crossentropy')
callbacks = [
    keras.callbacks.ModelCheckpoint("oxford_segmentation.keras" ,
                                    save_best_only = True)
]

history = model.fit(train_input_imgs ,
                    train_targets ,
                    epochs = 15 ,
                    callbacks = callbacks ,
                    batch_size = 64 ,
                    validation_data = (val_input_imgs , val_targets))

In [None]:
epochs = range(1 , len(history.history["loss"]) + 1)
loss = history.history["loss"]
val_loss = history.history["val_loss"]
plt.figure()
plt.plot(epochs , loss , "bo" , label = "Training loss")
plt.plot(epochs , val_loss , "b" , label = "Val loss")
plt.title("training vs val loss")
plt.legend()

In [None]:
from tensorflow.keras.utils import array_to_img

model = keras.models.load_model("/content/oxford_segmentation.keras")

i = 4
test_image = val_input_imgs[i]
plt.axis("off")
plt.imshow(array_to_img(test_image))
mask = model.predict(np.expand_dims(test_image , 0))[0]

def display_mask(pred):
  mask = np.argmax(pred , axis = -1)
  mask*=127
  plt.axis("off")
  plt.imshow(mask)

display_mask(mask)

**It needs more training !**

# Deep Learning For Text

## PreProcessing Text

### from scratch using simple python

In [None]:
import string

class Vectorizer:

  def standarize(self , text):
    text = text.lower()
    return " ".join(char for char in text
                   if char not in string.punctuation)


  def tokenize(self , text):
    text = self.standarize(text)
    return text.split()

  def make_vocabulary(self , dataset):

    self.vocabulary = {" " : 0 , "[UNK]" : 1}
    #The empty string "" and a special token "[UNK]" (representing unknown words) are initialized with indices 0 and 1, respectively. The unknown token "[UNK]" is used to represent words that are not found in the vocabulary.
    for text in dataset:
      text = self.standarize(text)
      tokens = self.tokenize(text)
      for token in tokens:
        if token not in self.vocabulary:
          self.vocabulary[token] = len(self.vocabulary) # for example ,if immediately we want to add a token , it will add token : 2 (where 2 is the length of the vocabulary before adding)
    self.inverse_vocabulary = dict(
        (v , k) for k ,v in self.vocabulary.items()
    )


  def encode(self , text):
    text = self.standarize(text)
    tokens = self.tokenize(text)
    return [self.vocabulary.get(token , 1) for token in tokens ]
    # This part of the code gets the index of token from self.vocabulary. If the token does not exist in the vocabulary, it returns 1. The number 1 is chosen because, in your vocabulary initialization, 1 is the index for the special "[UNK]" token,
    # which stands for "unknown". This token is used to represent words that are not found in the vocabulary.
    #  .get() method is that it allows you to specify a default value to return if the specified key is not found in the dictionary.
    # dictionary.get(key, default_value)

  def decode(self , int_sequence):
    return "".join(
        self.inverse_vocabulary.get(i , "[UNK]" )for i in int_sequence)

In [None]:
vectorizer = Vectorizer()

dataset = [
    "I write , erase , rewrite" ,
    "Erase again , and then" ,
    "A poppy blooms"
]

In [None]:
vectorizer.make_vocabulary(dataset)

In [None]:
test_sentence = "I write , rewrite , and still rewrite again"
encoded_sentence = vectorizer.encode(test_sentence)
print(encoded_sentence)

decoded_sentence = vectorizer.decode(encoded_sentence)
print(decoded_sentence)

This code works , but not efficient

### Using built-in functions

In [None]:
from tensorflow.keras.layers import TextVectorization

text_vectorization = TextVectorization(
    output_mode = 'int'
)
# by default it will use the .lower and the remove puncutation and split on whitespace.

dataset = text_vectorization.adapt(dataset)

In [None]:
print("vocabulary:" , text_vectorization.get_vocabulary())

vocabulary = text_vectorization.get_vocabulary()
test_sentence = "I write , rewrite , and still rewrite again"

encoded_sentence = text_vectorization(test_sentence)
print("encoded : " , encoded_sentence)
inverse_vocab = dict(enumerate(vocabulary))
decoded_sentence = " ".join(inverse_vocab[int(i)] for i in encoded_sentence)
print("decoded:" , decoded_sentence)

## Non-Sequential Models

In [None]:
!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -xf /content/aclImdb_v1.tar.gz

In [None]:
!rm -r /content/aclImdb/train/unsup

In [None]:
!cat /content/aclImdb/train/pos/10000_8.txt

In [None]:
#creating Validation set

import os
import random
import shutil , pathlib

os.mkdir("/content/aclImdb/val")
base_dir = pathlib.Path("/content/aclImdb")
train_dir = base_dir / "train"
val_dir = base_dir / "val"

for category in ("neg" , "pos"):
  os.mkdir(val_dir/category)
  files = os.listdir(train_dir/category)
  print(f"number of {category} samples before split" , len(files))
  random.Random(1337).shuffle(files)
  num_val_samples =int( 0.2 * len(files))
  val_files = files[-num_val_samples:]
  print(f"number of {category} samples after split ")
  for sample in val_files:
    shutil.move(train_dir/category/sample ,
                val_dir/category/sample)


In [None]:
from tensorflow import keras
batch_size = 32

train_ds = keras.utils.text_dataset_from_directory("/content/aclImdb/train" , batch_size = batch_size) # used to create a batched dataset
val_ds = keras.utils.text_dataset_from_directory("/content/aclImdb/val" , batch_size = batch_size)
test_ds = keras.utils.text_dataset_from_directory("/content/aclImdb/test" , batch_size = batch_size)

In [None]:
#Text Vectorization
from tensorflow.keras.layers import TextVectorization
#limit the vocabulary to the 20,000 most frequently used words in the data, otherwise we would be indexing every word in the training data.
text_vectorization = TextVectorization(
    #ngrams = None --> unigram 2 --> bigrams
    max_tokens = 20000 ,
    output_mode = 'multi_hot', # multi-hot is a binary encoding of multiple tokens in a single vector. setting it to 'count' will return number of times the ngram occured
    # setting it to "tf_idf" --> weights a given term by taking "term freq" , how many times the term appears in the current document and dividing it by a measure of "document frequency" which estimates how much the term appears across the dataset
)

text_only_train_ds = train_ds.map(lambda x , y:x) #  takes two arguments, x and y. The function returns only the first argument x --> raw text inputs with no labels
text_vectorization.adapt(text_only_train_ds) # use that dataset to index the dataset vocabulary

#prepare processed versions of train,test and val sets
binary_lgram_train_ds = train_ds.map(
    lambda x,y: (text_vectorization(x) , y)  ,
    num_parallel_calls = 4
)

binary_lgram_val_ds = val_ds.map(
    lambda x,y : (text_vectorization(x) , y)  ,
    num_parallel_calls = 4
)

binary_lgram_test_ds = test_ds.map(
    lambda x,y : (text_vectorization(x) , y)  ,
    num_parallel_calls = 4 # leverage multiple CPU cores , since the TextVectorization doesn't work on GPU
)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

def get_model(max_tokens = 20000 , hidden_dim = 6):

  inputs = keras.Input(shape = (max_tokens , ))
  x = layers.Dense(hidden_dim , activation = 'relu')(inputs)
  x = layers.Dropout(0.5)(x)
  outputs = layers.Dense(1 , activation = "sigmoid")(x)
  model = keras.Model(inputs , outputs)
  model.compile(optimizer = 'rmsprop' ,
                loss = "binary_crossentropy" ,
                metrics = ["accuracy"])
  return model

In [None]:
model = get_model()
model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint("binary_lgram.keras" ,
                                    save_best_only = True)
]

model.fit(binary_lgram_train_ds.cache() , # we call cache on the dataset to cache it to memory
          validation_data = binary_lgram_val_ds ,
          epochs = 10 ,
          callbacks = callbacks)

model = keras.models.load_model("binary_lgram.keras")
print(f"test accuracy : {model.evaluate(binary_lgram_test_ds)[1] : .3f}")

## Sequential Models

### bidirectional RNNs(bidirectional LSTMs)

In [None]:
from tensorflow.keras import layers

max_length = 600 #max length of a token
max_tokens = 20000 #vocab size


#vocab layer
text_vectorization = layers.TextVectorization(      #A preprocessing layer which maps text features to integer sequences.
    max_tokens = max_tokens ,
    output_mode = 'int' ,
    output_sequence_length = max_length
)

#The vocabulary for the layer must be either supplied on construction or learned via adapt()

text_vectorization.adapt(text_only_train_ds)

int_train_ds = train_ds.map(        # map() applies the lambda function to each element of the train_ds
    lambda x,y: (text_vectorization(x) , y) ,
    num_parallel_calls = 4)

int_val_ds = val_ds.map(
    lambda x,y: (text_vectorization(x) , y) ,
    num_parallel_calls = 4
)

int_test_ds = test_ds.map(
    lambda x,y: (text_vectorization(x) , y),
    num_parallel_calls = 4
)

now we need to transfrom these integer sequences to vector sequences via hot encoding.

each dimension would represent one possible term in the vocabulary.

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
#one input is a sequence of integers
inputs = keras.Input(shape = (None , ) , dtype = "int64") # None --> unspecified number along first dimension (not constant) --> this shape in keras.Inputs mean that it can accept any length along this axis /
#we specified the dtype because the tf.one_hot requires the input in a specific type

embedd = tf.one_hot(inputs , depth = max_tokens ) # Encode the integers into a binary 20,000 dimensional vectors

x = layers.Bidirectional(layers.LSTM(units = 32))(embedd) # for sequential processing
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1 , activation = "sigmoid")(x) #classification layer
model = keras.Model(inputs , outputs)

model.compile(optimizer = 'rmsprop' ,
              loss = 'binary_crossentropy' ,
              metrics = ['accuracy'])
model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint("one_hot_bidir_lstm.keras" ,
                                    save_best_only = True)
]

model.fit(int_train_ds ,
          validation_data = int_val_ds ,
          epochs = 10 ,
          callbacks = callbacks)

model = keras.models.load_model("content/one_hot_bidir_lstm.keras")
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

The model trains slowly , and thats because the inputs are large.

each input is encoded as a matrix of size (600 , 20000) --> 600 words per sample , 200000 possible words

Even that , the evaluations are not that high compared to our previously used network.

lets explore **Word Embeddings**

In [None]:
from tensorflow.keras import layers

max_tokens = 600
embedding_layer = layers.Embedding(input_dim = max_tokens , output_dim = 256) # best understood as dictionary that maps integer indices (which stands for specific words) to dense vectors. --> dictionary lookup.
# weights initially random , during training these word vectors are updated via backpropagation
# word index --> Embedding layer --> Corresponding word vector
# input (batch_size , sequence_length) where each entry is a sequence of integers
# layer returns a 3D floating-point tensor of shape (batch_size , sequence_length , embedding_dimensionality)

In [None]:
#Model that uses Embedding layer

inputs = keras.Input(shape = (None , ) , dtype = "int64")
embedd = layers.Embedding(input_dim = max_tokens , output_dim = 256)(inputs)
x = layers.Bidirectional(layers.LSTM(32))(embedd)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1 , activation = "sigmoid")(x)
model = keras.Model(inputs , outputs)

model.compile(optimizer = 'rmsprop' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint("embedding_bidir_gru.keras" ,
    save_best_only = True)
]

model.fit(int_train_ds ,
          validation_data = int_val_ds ,
          epochs = 10 ,
          callbacks = callbacks)

model = keras.models.load_model("/content/embedding_bidir_gru.keras")
print(f"Test acc: {model.evaluate(int_test_ds)[1] :.3f}")

This model trains much faster than one-hot model!

but still the bigram model is better in terms of accuracy on test set.

one reason is that the bigram model processed full reviews , while our sequence model truncates sequences to 600 words

Lets explore **Padding and Masking**

Here , what is hurting the performance , is that sentences longer than 600 tokens are truncated to a length of 600 tokens , while sentences shorter than 600 (max_length) are padded with zeros at the end.

Bidirectional RNN --> Using two RNN layers running in parallel --> on processing the tokens in their natural order , and the second processing the same tokens in reverse.

The RNN seeing the tokens in its natural order will spend hundreds of iterations if the original sentence is short due to padded zeros.

The information stored in the internal state of the RNN will gradually fade out as it gets exposed to these meaningless inputs.

Therefore we need a way to tell the RNN to skip these iterations.

There is an API for that called masking.

In [None]:
embedding_layer = keras.layers.Embedding(input_dim = 10 , output_dim = 256 , mask_zero = True)
some_input = [[4 , 3 , 2 , 1 , 0 , 0 ,0 ],
              [5 , 4 , 3 , 2 , 1 , 0 ,0] ,
              [2 , 1 , 0 , 0 , 0, 0 , 0]]

mask = embedding_layer.compute_mask(some_input)
mask

In [None]:
# using and embedding layer with masking enabled

inputs = keras.Input(shape = (None , ) , dtype = 'int64')
embedd = keras.layers.Embedding(
    input_dim = max_tokens , output_dim = 256 , mask_zero = True )(inputs)

x = keras.layers.Bidirectional(keras.layers.LSTM(32))(embedd)
x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(1 , activation = "sigmoid")(x)
model = keras.Model(inputs , outputs)

model.compile(optimizer = 'rmsprop' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint("embeddings_bidir_gru_with_masking.keras" ,
                                    save_best_only = True )
]

model.fit(int_train_ds ,
          validation_data = int_val_ds ,
          epochs = 10 ,
          callbacks = callbacks)

model = keras.models.load_model("/content/embeddings_bidir_gru_with_masking.keras")
print(f"Test acc: {model.evaluate(int_test_ds)[1] :.3f}")

A small noticeable improvement

### Pretrained Word Embeddings

popular ones : word2vec , glove

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip -q glove.6B.zip

In [None]:
#index that maps words

import numpy as np

path_to_glove = "/content/glove.6B.100d.txt"

embedding_index = {}

with open(path_to_glove) as f:
  for line in f:
    word , coefs = line.split(maxsplit = 1)
    coefs = np.fromstring(coefs , "f" , sep = " ")
    embedding_index[word] = coefs

print(f"Found {len(embedding_index)} word vectors")

In [None]:
# creating an embedding matrix to be loaded to the embedding layer

embedding_dim = 100
vocabulary = text_vectorization.get_vocabulary() # Retrieve the vocabulary indexed by our previous TextVectorization layer
word_index = dict(zip(vocabulary , range(len(vocabulary)))) # used to create a mapping from words to their index in the vocabulary

embedding_matrix = np.zeros((max_tokens , embedding_dim)) # prepare a matrix that we'll fill with the glove vectors.
for word , i in word_index.items():
  if i> max_tokens:
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
      embedding_matrix[i] = embedding_vector # fill entry i int the matrix with the word vector for index i , words not found in the embedding will be all zeros

In [None]:
embedding_layer = keras.layers.Embedding(
    max_tokens ,
    embedding_dim  ,
    embeddings_initializer = keras.initializers.Constant(embedding_matrix) ,  # load pre-trained embeddings
    trainable = False ,  #freeze layer
    mask_zero = True
)

In [None]:
#Model that uses pre-trained embeddings

inputs = keras.Input(shape = (None , ) , dtype = 'int64')
embedd = embedding_layer(inputs)

x = keras.layers.Bidirectional(keras.layers.LSTM(32))(embedd)
x = keras.layers.Dropout(0.5)(x)
outputs = keras.layers.Dense(1 , activation = "sigmoid")(x)
model = keras.Model(inputs , outputs)

model.compile(optimizer = 'rmsprop' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint("glove_embeddings.keras" ,
                                    save_best_only = True )
]

model.fit(int_train_ds ,
          validation_data = int_val_ds ,
          epochs = 10 ,
          callbacks = callbacks)

model = keras.models.load_model("/content/glove_embeddings.keras")
print(f"Test acc: {model.evaluate(int_test_ds)[1] :.3f}")

## Transformers