In [None]:
import os
import shutil
import random

# Path to the main dataset directory
source_dir = r"C:\Users\user\Downloads\mosquito"
output_dir = r"C:\Users\user\Downloads\sorted_dataset"

# Folder names
categories = ["Aedes aegypti", "Anopheles gambiae", "Culex pipiens", "Haemagogus janthinomys", "Sabethes cyaneus"]
train_ratio = 0.8  # 80% for training, 20% for validation

# Create output folders
train_dir = os.path.join(output_dir, "train")
validation_dir = os.path.join(output_dir, "validation")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)

# Function to create subfolders
def create_subfolders(base_dir, categories):
    for category in categories:
        os.makedirs(os.path.join(base_dir, category), exist_ok=True)

# Create subfolders for each category
create_subfolders(train_dir, categories)
create_subfolders(validation_dir, categories)

# Sort files into train and validation folders
for category in categories:
    category_dir = os.path.join(source_dir, category)
    if not os.path.exists(category_dir):
        print(f"Category folder not found: {category_dir}")
        continue

    files = os.listdir(category_dir)
    random.shuffle(files)  # Shuffle for random distribution

    # Split files into training and validation sets
    split_index = int(len(files) * train_ratio)
    train_files = files[:split_index]
    validation_files = files[split_index:]

    # Move files to train folder
    for file in train_files:
        src_path = os.path.join(category_dir, file)
        dest_path = os.path.join(train_dir, category, file)
        shutil.copy2(src_path, dest_path)

    # Move files to validation folder
    for file in validation_files:
        src_path = os.path.join(category_dir, file)
        dest_path = os.path.join(validation_dir, category, file)
        shutil.copy2(src_path, dest_path)

print("Dataset sorting completed!")


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import numpy as np
import os
from PIL import Image

# Paths to the dataset
base_dir = r"C:\Users\user\Downloads\sorted_dataset"
train_dir = os.path.join(base_dir, "train")
validation_dir = os.path.join(base_dir, "validation")

# Image parameters
IMG_HEIGHT = 150
IMG_WIDTH = 150
BATCH_SIZE = 32
NUM_CLASSES = 5  # Number of mosquito species

# Create Image Data Generators
train_datagen = ImageDataGenerator(rescale=1.0/255)
validation_datagen = ImageDataGenerator(rescale=1.0/255)

train_data = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_data = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Build the CNN Model
def build_model():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Train the model
model = build_model()

history = model.fit(
    train_data,
    epochs=10,
    validation_data=validation_data
)

# Save the model
model_path = "mosquito_species_model.h5"
model.save(model_path)
print(f"Model saved to {model_path}")

# Plot Training and Validation Accuracy/Loss
def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend()
    plt.title('Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend()
    plt.title('Loss')

    plt.show()

plot_history(history)

# Predict Function
def predict_image(image_path, model):
    img = Image.open(image_path).resize((IMG_WIDTH, IMG_HEIGHT))
    img_array = np.array(img) / 255.0  # Normalize the image
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    predictions = model.predict(img_array)
    class_indices = train_data.class_indices
    class_names = list(class_indices.keys())
    
    predicted_class = class_names[np.argmax(predictions)]
    confidence = np.max(predictions) * 100

    print(f"Predicted Species: {predicted_class} ({confidence:.2f}%)")

# Test the Model with a New Image
test_image_path = r"C:\Users\user\Downloads\test\download.jpeg"  # Replace with your image path
if os.path.exists(test_image_path):
    loaded_model = load_model(model_path)
    predict_image(test_image_path, loaded_model)
else:
    print("Test image not found. Please check the file path.")
