In [None]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Aedes Aegypti'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load images for the specific class
image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(image_files)

# Augment images for the specific class
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(image_files[i % len(image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_{len(image_files) + i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Processing additional image 1/281
Processing additional image 2/281
Processing additional image 3/281
Processing additional image 4/281
Processing additional image 5/281
Processing additional image 6/281
Processing additional image 7/281
Processing additional image 8/281
Processing additional image 9/281
Processing additional image 10/281
Processing additional image 11/281
Processing additional image 12/281
Processing additional image 13/281
Processing additional image 14/281
Processing additional image 15/281
Processing additional image 16/281
Processing additional image 17/281
Processing additional image 18/281
Processing additional image 19/281
Processing additional image 20/281
Processing additional image 21/281
Processing additional image 22/281
Processing additional image 23/281
Processing additional image 24/281
Processing additional image 25/281
Processing additional image 26/281
Processing additional image 27/281
Processing additional image 28/281
Processing additional image 2

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Aedes Aegypti'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Aedes Aegypti': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Albimanus'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/64
Saving original image 2/64
Saving original image 3/64
Saving original image 4/64
Saving original image 5/64
Saving original image 6/64
Saving original image 7/64
Saving original image 8/64
Saving original image 9/64
Saving original image 10/64
Saving original image 11/64
Saving original image 12/64
Saving original image 13/64
Saving original image 14/64
Saving original image 15/64
Saving original image 16/64
Saving original image 17/64
Saving original image 18/64
Saving original image 19/64
Saving original image 20/64
Saving original image 21/64
Saving original image 22/64
Saving original image 23/64
Saving original image 24/64
Saving original image 25/64
Saving original image 26/64
Saving original image 27/64
Saving original image 28/64
Saving original image 29/64
Saving original image 30/64
Saving original image 31/64
Saving original image 32/64
Saving original image 33/64
Saving original image 34/64
Saving original image 35/64
Saving original image 36/64
S

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Albimanus'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Albimanus': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Arabiensis'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/193
Saving original image 2/193
Saving original image 3/193
Saving original image 4/193
Saving original image 5/193
Saving original image 6/193
Saving original image 7/193
Saving original image 8/193
Saving original image 9/193
Saving original image 10/193
Saving original image 11/193
Saving original image 12/193
Saving original image 13/193
Saving original image 14/193
Saving original image 15/193
Saving original image 16/193
Saving original image 17/193
Saving original image 18/193
Saving original image 19/193
Saving original image 20/193
Saving original image 21/193
Saving original image 22/193
Saving original image 23/193
Saving original image 24/193
Saving original image 25/193
Saving original image 26/193
Saving original image 27/193
Saving original image 28/193
Saving original image 29/193
Saving original image 30/193
Saving original image 31/193
Saving original image 32/193
Saving original image 33/193
Saving original image 34/193
Saving original image 3

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Arabiensis'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Arabiensis': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Atroparvus'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/36
Saving original image 2/36
Saving original image 3/36
Saving original image 4/36
Saving original image 5/36
Saving original image 6/36
Saving original image 7/36
Saving original image 8/36
Saving original image 9/36
Saving original image 10/36
Saving original image 11/36
Saving original image 12/36
Saving original image 13/36
Saving original image 14/36
Saving original image 15/36
Saving original image 16/36
Saving original image 17/36
Saving original image 18/36
Saving original image 19/36
Saving original image 20/36
Saving original image 21/36
Saving original image 22/36
Saving original image 23/36
Saving original image 24/36
Saving original image 25/36
Saving original image 26/36
Saving original image 27/36
Saving original image 28/36
Saving original image 29/36
Saving original image 30/36
Saving original image 31/36
Saving original image 32/36
Saving original image 33/36
Saving original image 34/36
Saving original image 35/36
Saving original image 36/36
P

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Atroparvus'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Atroparvus': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Coluzzi'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/74
Saving original image 2/74
Saving original image 3/74
Saving original image 4/74
Saving original image 5/74
Saving original image 6/74
Saving original image 7/74
Saving original image 8/74
Saving original image 9/74
Saving original image 10/74
Saving original image 11/74
Saving original image 12/74
Saving original image 13/74
Saving original image 14/74
Saving original image 15/74
Saving original image 16/74
Saving original image 17/74
Saving original image 18/74
Saving original image 19/74
Saving original image 20/74
Saving original image 21/74
Saving original image 22/74
Saving original image 23/74
Saving original image 24/74
Saving original image 25/74
Saving original image 26/74
Saving original image 27/74
Saving original image 28/74
Saving original image 29/74
Saving original image 30/74
Saving original image 31/74
Saving original image 32/74
Saving original image 33/74
Saving original image 34/74
Saving original image 35/74
Saving original image 36/74
S

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Coluzzi'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Coluzzi': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Farauti'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/57
Saving original image 2/57
Saving original image 3/57
Saving original image 4/57
Saving original image 5/57
Saving original image 6/57
Saving original image 7/57
Saving original image 8/57
Saving original image 9/57
Saving original image 10/57
Saving original image 11/57
Saving original image 12/57
Saving original image 13/57
Saving original image 14/57
Saving original image 15/57
Saving original image 16/57
Saving original image 17/57
Saving original image 18/57
Saving original image 19/57
Saving original image 20/57
Saving original image 21/57
Saving original image 22/57
Saving original image 23/57
Saving original image 24/57
Saving original image 25/57
Saving original image 26/57
Saving original image 27/57
Saving original image 28/57
Saving original image 29/57
Saving original image 30/57
Saving original image 31/57
Saving original image 32/57
Saving original image 33/57
Saving original image 34/57
Saving original image 35/57
Saving original image 36/57
S

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Farauti'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Farauti': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Freeborni'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/97
Saving original image 2/97
Saving original image 3/97
Saving original image 4/97
Saving original image 5/97
Saving original image 6/97
Saving original image 7/97
Saving original image 8/97
Saving original image 9/97
Saving original image 10/97
Saving original image 11/97
Saving original image 12/97
Saving original image 13/97
Saving original image 14/97
Saving original image 15/97
Saving original image 16/97
Saving original image 17/97
Saving original image 18/97
Saving original image 19/97
Saving original image 20/97
Saving original image 21/97
Saving original image 22/97
Saving original image 23/97
Saving original image 24/97
Saving original image 25/97
Saving original image 26/97
Saving original image 27/97
Saving original image 28/97
Saving original image 29/97
Saving original image 30/97
Saving original image 31/97
Saving original image 32/97
Saving original image 33/97
Saving original image 34/97
Saving original image 35/97
Saving original image 36/97
S

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Freeborni'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Freeborni': 500


In [None]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    return img

# Define paths
base_dir = '/content/gdrive/MyDrive/datasets/Dataset_V2'
output_dir = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Specific class to augment
class_to_augment = 'Anopheles Stephensi'

# Augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Desired total number of images for the specific class
desired_total_images_for_class = 500

# Path to the specific class directory
class_dir = os.path.join(base_dir, class_to_augment)
output_class_dir = os.path.join(output_dir, class_to_augment)

# Create a folder for the class in the output directory
os.makedirs(output_class_dir, exist_ok=True)

# Load original images for the specific class
original_image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('jpg', 'jpeg', 'png'))]

# Save original images
for i, original_image_file in enumerate(original_image_files):
    print(f"Saving original image {i+1}/{len(original_image_files)}")
    img = load_and_preprocess_image(original_image_file)

    # Save the original image
    original_image_path = os.path.join(output_class_dir, f'{class_to_augment}_original_{i + 1}.jpg')
    cv2.imwrite(original_image_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

# Calculate the remaining number of images to generate
remaining_images_to_generate = desired_total_images_for_class - len(original_image_files)

# Augment additional images
for i in range(remaining_images_to_generate):
    print(f"Processing additional image {i+1}/{remaining_images_to_generate}")
    img = load_and_preprocess_image(original_image_files[i % len(original_image_files)])  # Reuse existing images for augmentation

    # Apply data augmentation
    x_batch = next(datagen.flow(np.expand_dims(img, axis=0), batch_size=1, shuffle=False))[0]

    augmented_image_path = os.path.join(output_class_dir, f'{class_to_augment}_augmented_{i + 1}.jpg')
    cv2.imwrite(augmented_image_path, cv2.cvtColor(x_batch, cv2.COLOR_RGB2BGR))  # Save the augmented image

print("Data augmentation and saving complete.")


Saving original image 1/9
Saving original image 2/9
Saving original image 3/9
Saving original image 4/9
Saving original image 5/9
Saving original image 6/9
Saving original image 7/9
Saving original image 8/9
Saving original image 9/9
Processing additional image 1/491
Processing additional image 2/491
Processing additional image 3/491
Processing additional image 4/491
Processing additional image 5/491
Processing additional image 6/491
Processing additional image 7/491
Processing additional image 8/491
Processing additional image 9/491
Processing additional image 10/491
Processing additional image 11/491
Processing additional image 12/491
Processing additional image 13/491
Processing additional image 14/491
Processing additional image 15/491
Processing additional image 16/491
Processing additional image 17/491
Processing additional image 18/491
Processing additional image 19/491
Processing additional image 20/491
Processing additional image 21/491
Processing additional image 22/491
Proce

In [None]:
import os

# Specify the path
path_to_check = '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Stephensi'

# List all files in the directory
files = os.listdir(path_to_check)

# Filter only image files (assuming images have extensions like jpg, jpeg, png)
image_files = [file for file in files if file.lower().endswith(('jpg', 'jpeg', 'png'))]

# Print the total number of images
print(f"Total number of images in '{path_to_check}': {len(image_files)}")


Total number of images in '/content/gdrive/MyDrive/datasets/augmented_dataset/Anopheles Stephensi': 500


In [None]:
import os
import shutil
import random

# Set the path to your dataset
dataset_path = '/content/gdrive/MyDrive/datasets/augmented_dataset'

# Set the paths for the train and test sets
trainset_path = '/content/gdrive/MyDrive/datasets/trainset'
testset_path = '/content/gdrive/MyDrive/datasets/testset'

# Set the ratio for splitting (80% trainset, 20% testset)
split_ratio = 0.8

# Create the trainset and testset directories if they don't exist
os.makedirs(trainset_path, exist_ok=True)
os.makedirs(testset_path, exist_ok=True)

# Iterate over each class
for mosquito_class in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, mosquito_class)

    # Ensure it's a directory
    if os.path.isdir(class_path):
        # Get the list of images for the current class
        images = [img for img in os.listdir(class_path) if img.endswith(".jpg")]

        # Shuffle the images randomly
        random.shuffle(images)

        # Calculate the split index
        split_index = int(len(images) * split_ratio)

        # Split the images into train and test sets
        train_images = images[:split_index]
        test_images = images[split_index:]

        # Copy images to the respective directories
        for img in train_images:
            source_path = os.path.join(class_path, img)
            destination_path = os.path.join(trainset_path, mosquito_class, img)
            os.makedirs(os.path.dirname(destination_path), exist_ok=True)
            shutil.copy(source_path, destination_path)

        for img in test_images:
            source_path = os.path.join(class_path, img)
            destination_path = os.path.join(testset_path, mosquito_class, img)
            os.makedirs(os.path.dirname(destination_path), exist_ok=True)
            shutil.copy(source_path, destination_path)

print("Dataset split into trainset and testset successfully.")


Dataset split into trainset and testset successfully.


In [None]:
import os

# Set the paths for the train and test sets
trainset_path = '/content/gdrive/MyDrive/datasets/trainset'
testset_path = '/content/gdrive/MyDrive/datasets/testset'

# Print class names and number of images in the trainset
print("Trainset:")
for mosquito_class in os.listdir(trainset_path):
    class_path = os.path.join(trainset_path, mosquito_class)

    # Ensure it's a directory
    if os.path.isdir(class_path):
        # Get the list of images for the current class
        images = [img for img in os.listdir(class_path) if img.endswith(".jpg")]

        # Print class name and number of images
        print(f"{mosquito_class}: {len(images)} images")

# Print a separator
print("\n" + "="*40 + "\n")

# Print class names and number of images in the testset
print("Testset:")
for mosquito_class in os.listdir(testset_path):
    class_path = os.path.join(testset_path, mosquito_class)

    # Ensure it's a directory
    if os.path.isdir(class_path):
        # Get the list of images for the current class
        images = [img for img in os.listdir(class_path) if img.endswith(".jpg")]

        # Print class name and number of images
        print(f"{mosquito_class}: {len(images)} images")


Trainset:
Culex Quinquefasciatus: 400 images
Aedes Albopictus: 400 images
Aedes Aegypti: 400 images
Anopheles Albimanus: 400 images
Anopheles Arabiensis: 400 images
Anopheles Atroparvus: 400 images
Anopheles Coluzzi: 400 images
Anopheles Farauti: 400 images
Anopheles Freeborni: 400 images
Anopheles Stephensi: 400 images


Testset:
Culex Quinquefasciatus: 100 images
Aedes Albopictus: 100 images
Aedes Aegypti: 100 images
Anopheles Albimanus: 100 images
Anopheles Arabiensis: 100 images
Anopheles Atroparvus: 100 images
Anopheles Coluzzi: 100 images
Anopheles Farauti: 100 images
Anopheles Freeborni: 100 images
Anopheles Stephensi: 100 images


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# Define your dataset directories
train_data_dir = '/content/gdrive/MyDrive/datasets/trainset'
val_data_dir = '/content/gdrive/MyDrive/datasets/testset'

# Image dimensions
img_height, img_width = 224, 224
batch_size = 32

import tensorflow as tf

# Check if GPU is available and being used
if tf.config.list_physical_devices('GPU'):
    print("GPU is available and being used for training.")
else:
    print("GPU is not available. Training on CPU.")

# Load the dataset with data augmentation
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_ds = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse'
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    val_data_dir,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)

num_classes = len(train_ds.class_indices)

# Define the model
pretrained_model = ResNet50(
    include_top=False,
    input_shape=(img_height, img_width, 3),
    pooling='avg',
    weights='imagenet'
)

model = Sequential([
    pretrained_model,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),  # Dropout regularization
    layers.Dense(num_classes, activation='softmax')  # Output layer with 'num_classes' units
])

# Freeze the pretrained layers
for layer in pretrained_model.layers:
    layer.trainable = False

# Compile the model with a different learning rate
optimizer = Adam(learning_rate=0.0001)  # Changed learning rate
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
epochs = 20

# ModelCheckpoint callback to save the best model during training
checkpoint_path = '/content/drive/MyDrive/mosquito_model2'  # Change the path as needed
checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                             save_weights_only=False,
                             monitor='val_accuracy',
                             mode='max',
                             save_best_only=True)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    callbacks=[checkpoint]  # Include the ModelCheckpoint callback
)

# Get the model name
model_name = checkpoint_path.split('/')[-1]

# Print the model name along with its accuracy
print(f"Model {model_name} saved with Validation Accuracy: {max(history.history['val_accuracy']):.4f}")

# Plotting accuracy and loss
plt.figure(figsize=(12, 4))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


GPU is available and being used for training.
Found 4000 images belonging to 10 classes.
Found 1000 files belonging to 10 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/20

FailedPreconditionError: /content/drive/MyDrive; Transport endpoint is not connected