In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
import os
import shutil
from sklearn.model_selection import train_test_split
import random

In [5]:
source_dir = "/Users/pavan/Downloads/minor dataset/plantvillage dataset/APPLE"  
train_dir = "/Users/pavan/Downloads/minor dataset/plantvillage dataset/train"  
test_dir = "/Users/pavan/Downloads/minor dataset/plantvillage dataset/test"  

In [6]:
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [7]:
# Function to split data into train and test sets
def split_data(source_dir, train_dir, test_dir, test_size=0.2):
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        
        if not os.path.isdir(class_path):
            continue  
        
        os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
        
        images = os.listdir(class_path)
        
        # Split images into train and test sets
        train_images, test_images = train_test_split(images, test_size=test_size, random_state=42)
        
        # Copy images to respective directories
        for img in train_images:
            src = os.path.join(class_path, img)
            dst = os.path.join(train_dir, class_name, img)
            shutil.copy(src, dst)
        
        for img in test_images:
            src = os.path.join(class_path, img)
            dst = os.path.join(test_dir, class_name, img)
            shutil.copy(src, dst)


In [8]:
# Function to oversample the minority classes in the training set
def oversample_class(class_dir, target_count):
    file_list = os.listdir(class_dir)
    current_count = len(file_list)
    
    while current_count < target_count:
        file_to_copy = random.choice(file_list)
        src = os.path.join(class_dir, file_to_copy)
        new_file_name = f"copy_{current_count}_{file_to_copy}"
        dst = os.path.join(class_dir, new_file_name)
        shutil.copy(src, dst)
        current_count += 1

In [9]:
# Function to oversample all classes to match the largest class
def oversample_dataset(train_dir, target_count=1500):
    for class_name in os.listdir(train_dir):
        class_path = os.path.join(train_dir, class_name)
        
        if os.path.isdir(class_path):
            current_count = len(os.listdir(class_path))
            if current_count < target_count:
                oversample_class(class_path, target_count)

In [10]:
split_data(source_dir, train_dir, test_dir)

In [11]:
oversample_dataset(train_dir)

In [14]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

In [16]:
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize the images
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],  # Add brightness augmentation
    channel_shift_range=0.2,  # Adjust RGB channels
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)


In [19]:
train_generator = train_datagen.flow_from_directory(
    '/Users/pavan/Downloads/minor dataset/plantvillage dataset/train',  # Path to the training set
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Load test data (no augmentation)
test_generator = test_datagen.flow_from_directory(
    '/Users/pavan/Downloads/minor dataset/plantvillage dataset/test',  # Path to the test set
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Found 12000 images belonging to 8 classes.
Found 1270 images belonging to 8 classes.


In [20]:
# For Model Building !!!

In [21]:
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Attention, Concatenate, Reshape, Multiply
from tensorflow.keras import layers

In [23]:
# Load the pre-trained GoogleNet (InceptionV3) model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Extract features from the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Define attention mechanism
def attention_module(x):
    # Define the attention layer
    attention_probs = Dense(x.shape[-1], activation='softmax')(x)
    attention_mul = Multiply()([x, attention_probs])
    return attention_mul

# Apply the attention module
x = attention_module(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)  # Number of classes

In [24]:
# Create the Model
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)