#Data Augmentation and Splitting for Training, Validation and Testing

##Import Libraries and Packages

In [None]:
import tensorflow as tf
import requests
import zipfile
import shutil
import os

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from PIL import UnidentifiedImageError
from google.colab import files

##Downloading and Extracting ZIP Files from GitHub

In [None]:
# Retrieve data from GitHub and ccreate a temporary path
zip_url = "https://github.com/snapmoo/SnapMoo-ML/raw/main/FMD_Cattle.zip"
zip_path = "/content/FMD_Cattle.zip"

# Download the zip file
response = requests.get(zip_url, stream=True)
with open(zip_path, 'wb') as f:
    for chunk in response.iter_content(chunk_size=8192):
        if chunk:
            f.write(chunk)

# Extract the downloaded Zip file
extract_dir = "/content/FMD_Cattle"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print("Extraction complete.")

# List the files in the extracted directory to verify
extracted_files = os.listdir(extract_dir)
print("Extracted files:", extracted_files)

Extraction complete.
Extracted files: ['FMD_Cattle']


##Augmentation and Splitting Process

In [None]:
original_dir = "/content/FMD_Cattle/FMD_Cattle/FMD_Cattle"
base_dir = "/content/augmented_Dataset"

# Create directories for augmented dataset
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

def create_category_directories(base_path):
    for category in ['0', '1']:
        os.makedirs(os.path.join(base_path, category), exist_ok=True)

create_category_directories(train_dir)
create_category_directories(validation_dir)
create_category_directories(test_dir)

# Augmentation settings
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='reflect'
)

# Split the data and augment
for category in ['0', '1']:
    category_dir = os.path.join(original_dir, category)

    for subfolder in os.listdir(category_dir):
        images = os.listdir(category_dir)

        train_images, temp_images = train_test_split(images, test_size=0.4, random_state=42)
        validation_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)

        for image_set, dest_dir in zip([train_images, validation_images, test_images],
                                       [train_dir, validation_dir, test_dir]):
            for image_name in image_set:
                src = os.path.join(category_dir, image_name)
                dst = os.path.join(dest_dir, category, image_name)
                try:
                    shutil.copyfile(src, dst)
                except Exception as e:
                    print(f"Error copying {src} to {dst}: {e}")

# Apply augmentation to training and validation
for category in ['0', '1']:
    for dest_dir in [train_dir, validation_dir]:
        aug_dir = os.path.join(dest_dir, category)
        images = os.listdir(aug_dir)
        for image_name in images:
            src = os.path.join(aug_dir, image_name)
            try:
                img = tf.keras.preprocessing.image.load_img(src)
                x = tf.keras.preprocessing.image.img_to_array(img)
                x = x.reshape((1,) + x.shape)

                i = 0
                for batch in datagen.flow(x, batch_size=1, save_to_dir=aug_dir,
                                          save_prefix=f'{image_name.split(".")[0]}_aug', save_format='jpeg'):
                    i += 1
                    if i >= 30:
                        break
            except Exception as e:
                print(f"Error augmenting {src}: {e}")

print("Data augmentation and splitting complete.")


Data augmentation and splitting complete.


##Compress Augmentation Results into ZIP File

In [None]:
final_zip_path = "/content/augmented_FMD_Cattle.zip"
shutil.make_archive('/content/augmented_FMD_Cattle', 'zip', base_dir)

print("Compression complete.")

Compression complete.


##Download the Augmented ZIP File

In [None]:
files.download(final_zip_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>