## IMAGE AUGMENTATION

### Import Packages

In [1]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import array_to_img

import tensorflow as tf
import matplotlib.pyplot as plt
import os

import warnings
warnings.filterwarnings('ignore')

2023-10-11 12:05:19.740133: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-11 12:05:24.028038: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-10-11 12:05:24.029500: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/loca

#### Initial Data Selection:

In [None]:
#Example of data selection for dataset creation
query = """
            SELECT * FROM wmt-dca-catalog-dq-dev.POD2.Triage_Analysis where PROD_TYPE_NM IN ('Hair Sprays')
        """
product_data = client.query(query).to_dataframe()
product_data.shape

#### Download Images 

In [1]:
def download_image(image_url, filename, directory):
    
    """
    function to down images for a given url and save to specified directory
    
    Parameters:
    - image_url (str): The Main image url.
    - filename (str): The download image name.
    - directory (str): The directory to save the  images.
    """
    filepath = os.path.join(directory, filename)
    if os.path.exists(filepath):
        #print(f"Image already exists: {filename}")
        return

    response = requests.get(image_url)
    if response.status_code == 200:
        with open(filepath, 'wb') as file:
            file.write(response.content)
        #print(f"Image saved: {filename}")
    else:
        print(f"Failed to download image: {image_url}")

def save_images(df, url_column, filename_column, directory, num_threads=10):
    image_urls = df[url_column]
    filenames = df[filename_column] + '.jpeg'
    os.makedirs(directory, exist_ok=True)

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        for image_url, filename in zip(image_urls, filenames):
            executor.submit(download_image, image_url, filename, directory)


In [None]:
save_images(product_data,"MAINIMAGEURL","WPID","/train_images/")

#### Augment downloaded Images

In [2]:
def image_augment (img, output_directory):
    """
    Augments train images from the input directory and saves them to the specified output directory.

    Parameters:
    - img (str): The original train images.
    - output_directory (str): The directory to save the augmented images.
    
    """
    # get image name which is a product id
    product_id = img.split(".")[0]
    
    #load the image
    img = load_img(f'./train_images/{img}')
    
    os.makedirs(output_directory, exist_ok=True)
    
    #save original image
    img.save(f'{output_directory}/{product_id}_original.jpeg')
    
    # Saturation effect
    saturated = tf.image.adjust_saturation(img, saturation_factor = 1.5, name=None)
    img_sat = array_to_img(saturated)
    img_sat.save(f'{output_directory}/{product_id}_saturated.jpeg')
    
    # Flipped right to left
    flipped = tf.image.flip_left_right(img)
    img_flipped = array_to_img(flipped)
    img_flipped.save(f'{output_directory}/{product_id}_flipped.jpeg')
    
    # Contrast effect
    contrast = tf.image.adjust_contrast(img,contrast_factor = 3.)
    img_constrasted = array_to_img(contrast)
    img_constrasted.save(f'{output_directory}/{product_id}_constrast.jpeg')
    
    # Brightness effect
    brightness = tf.image.adjust_brightness(img, delta=0.1)
    img_brightned = array_to_img(brightness)
    img_brightned.save(f'{output_directory}/{product_id}_brightness.jpeg')
    
    # Traslation - shift x-axis and y-axis
    image_2_arr = np.array(img)
    datagen = ImageDataGenerator(
              width_shift_range=-0.2,  # shift horizontally by 20%
              height_shift_range=-0.2, # shift vertically by 20%
            )
    img_translated = datagen.apply_transform(image_2_arr, {'tx': 50, 'ty': 50})
    img_translated = array_to_img(img_translated)
    img_translated.save(f'{output_directory}/{product_id}_translated.jpeg')
    

In [3]:
# Image files
image_files = [img for img in os.listdir("./train_images/") if img.split(".")[1] in ['jpeg']]
len(image_files)

200

In [4]:
count = 0
for file in image_files:
    image_augment (file,'./train_augmented_images/')
    count = count + 1
print (count)

2023-10-11 12:05:48.570669: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-10-11 12:05:48.571575: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-10-11 12:05:48.571603: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (prarja): /proc/driver/nvidia/version does not exist
2023-10-11 12:05:48.577434: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropria

200


In [None]:
#copying augmented images into GCP bucket
!gsutil -m cp ./train_augmented_images/*.jpeg gs://<INSERT YOUR DATASET>/Accuracy_Image_Classification/hairspray/train_augmented_images/

#### Create a CSV file for train model dataset

In [None]:
# Get GCS urls for train images and map them to their class by WPID to Is Aerosol column

label_df = pd.DataFrame()

url_string_to_add = "gs://<INSERT YOUR DATASET>/Accuracy_Image_Classification/hairspray/train_augmented_images/"

urls_for_gcs, classes = [], []

image_files = [img for img in os.listdir("./train_augmented_images/") if img.split(".")[1] in ['jpeg']]

for file in image_files:
    urls_for_gcs.append(url_string_to_add + file)
    classes.append(df['AEROSOL_IND'][df['WPID'] == f'{file.split("_")[0]}'].values[0])

label_df['gcs_url'] = urls_for_gcs
label_df['class'] = classes


label_df.to_csv("./csv_files/hairspray_train_images_uri.csv", header = False, index = False)

In [None]:
#copying augmented images uri dataset into GCP bucket
!gsutil cp ./csv_files/hairspray_train_images_uri.csv gs://<INSERT YOUR DATASET>/Accuracy_Image_Classification/hairspray/training_dataset/