In [None]:
import os
import zipfile
import shutil
from google.colab import drive
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications.vgg16 import VGG16  # Modify in each file
from keras.applications.vgg16 import preprocess_input  # Modify in each file
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Check GPU availability
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Spliting data into train, validation and test

In [None]:
data_path = "/content/drive/MyDrive/RCP-CNN-1/CoLeaf-augmented"

In [None]:
# path to destination folders
train_folder = os.path.join(data_path, 'train')
val_folder = os.path.join(data_path, 'eval')
test_folder = os.path.join(data_path, 'test')

In [None]:
categories = ['boron', 'calcium', 'healthy', 'iron', 'magnesium', 'manganese',
              'others', 'nitrogen', 'phosphorus', 'potasium']

files_dict = {category: set() for category in categories}


# Check if the category directories already exist in train, validation, and test folders
directories_exist = all(
    os.path.exists(os.path.join(train_folder, category)) and
    os.path.exists(os.path.join(val_folder, category)) and
    os.path.exists(os.path.join(test_folder, category))
    for category in categories
)

In [None]:
# If directories don't exist, create them
if not directories_exist:
    for category in categories:
        train_category_dir = os.path.join(train_folder, category)
        validation_category_dir = os.path.join(val_folder, category)
        test_category_dir = os.path.join(test_folder, category)
        os.makedirs(train_category_dir, exist_ok=True)
        os.makedirs(validation_category_dir, exist_ok=True)
        os.makedirs(test_category_dir, exist_ok=True)

In [None]:
# Walk through the base directory to find image files
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.lower().endswith(('.jpg', '.png', '.jpeg', '.jpg')):
            file_path = os.path.join(root, file)
            for category in categories:
                if category in root.lower():
                    if file_path not in files_dict[category]:
                        files_dict[category].add(file_path)
                    break

# Convert sets to lists for further processing
files_list_dict = {category: list(files_dict[category]) for category in categories}

# Print the number of files found in each category
for category in categories:
    print(f"Number of {category} files found: {len(files_list_dict[category])}")


Number of boron files found: 2672
Number of calcium files found: 2493
Number of healthy files found: 2543
Number of iron files found: 2428
Number of magnesium files found: 2474
Number of manganese files found: 2418
Number of others files found: 2412
Number of nitrogen files found: 2360
Number of phosphorus files found: 2446
Number of potasium files found: 2475


In [None]:
# Define the function to split and copy files
def split_and_copy_files(files, train_dir, validation_dir, test_dir):
    # Split the files into 70% train, 15% validation, and 15% test
    train_files, temp_files = train_test_split(files, test_size=0.3, random_state=42)
    validation_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

    # Function to copy files to the target directory
    def copy_files(file_list, target_dir):
        for file_path in file_list:
            target_path = os.path.join(target_dir, os.path.basename(file_path))
            shutil.copy(file_path, target_path)

    # Copy training files
    copy_files(train_files, train_dir)
    # Copy validation files
    copy_files(validation_files, validation_dir)
    # Copy testing files
    copy_files(test_files, test_dir)

In [None]:
# Iterate over each category and split the files
for category in categories:
    # Define the train, validation, and test directories for the current category
    train_category_dir = os.path.join(train_folder, category)
    validation_category_dir = os.path.join(val_folder, category)
    test_category_dir = os.path.join(test_folder, category)

    # Make sure the category directories exist
    os.makedirs(train_category_dir, exist_ok=True)
    os.makedirs(validation_category_dir, exist_ok=True)
    os.makedirs(test_category_dir, exist_ok=True)

    # Get the list of files for the current category
    category_files = files_list_dict[category]

    # Split and copy the files to their respective directories
    split_and_copy_files(category_files, train_category_dir, validation_category_dir, test_category_dir)

print("Files have been successfully copied to training, validation, and test directories.")

Files have been successfully copied to training, validation, and test directories.


### Paths of the training, evaluation and testing tests

In [None]:
train_folder = '/content/drive/MyDrive/RCP-CNN-1/CoLeaf-augmented/train'
val_folder = '/content/drive/MyDrive/RCP-CNN-1/CoLeaf-augmented/eval'
test_folder = '/content/drive/MyDrive/RCP-CNN-1/CoLeaf-augmented/test'