#Check GPU

In [15]:
# Check that a GPU is available in Colab.

!nvidia-smi -L || echo


GPU 0: Tesla T4 (UUID: GPU-cc38f60b-a6be-052e-3e77-0e1efdfcf1cb)


#Install Kaggle package

In [16]:
# Kaggle lets us download datasets directly using the Kaggle API.

!pip install -q kaggle

#Install split-folders

In [17]:
# This package automatically divides our dataset
# into train / validation / test folders with a given ratio.

!pip install -q split-folders==0.5.1

#Import required Python libraries

In [18]:
# - os, glob, shutil, json → for file and folder operations
# - random, numpy → for numeric calculations and reproducibility
# - tensorflow → main deep learning library (for CNN)

import os, glob, shutil, json, random
import numpy as np
import tensorflow as tf


#Set random seeds for reproducibility

In [19]:
# This ensures the model results remain the same
# every time you run the notebook.

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)


#Verify TensorFlow version and GPU availability

In [20]:
# - Check TensorFlow version (we need >= 2.10)
# - Verify GPU is visible to TensorFlow for faster training

print("TensorFlow version:", tf.__version__)
print("GPUs visible to TensorFlow:", tf.config.list_physical_devices('GPU'))


TensorFlow version: 2.19.0
GPUs visible to TensorFlow: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


#Create folder paths for our project

In [21]:
# Create main folder paths
# - Original dataset (downloaded from Kaggle) in /content/data
# - Train/Val/Test split dataset in /content/tea_splits
# - Saved models & results in /content/artifacts

DATA_ROOT = "/content/data"          # Original dataset
SPLIT_ROOT = "/content/tea_splits"   # After splitting into train/val/test
ARTIFACTS_DIR = "/content/artifacts" # For saved models, graphs, etc.

# Create the folders if they don't exist

import os
os.makedirs(DATA_ROOT, exist_ok=True)
os.makedirs(SPLIT_ROOT, exist_ok=True)
os.makedirs(ARTIFACTS_DIR, exist_ok=True)


#Download dataset using opendatasets

In [22]:
# This method lets you directly type your Kaggle username and key (safe for Colab).
# It will automatically download and unzip the dataset into /content/data.

!pip install -q opendatasets

import opendatasets as od

# Dataset URL
dataset_url = "https://www.kaggle.com/datasets/uthpalabandara/common-diseases-of-tea-leaves-in-sri-lanka"

# Download the dataset (you will be prompted for Kaggle username & key)
od.download(dataset_url, data_dir="/content/data")


Skipping, found downloaded files in "/content/data/common-diseases-of-tea-leaves-in-sri-lanka" (use force=True to force download)


#Verify the dataset structure

In [23]:
# Verify that the dataset was downloaded and extracted correctly
# We'll check the folder inside /content/data and list all subfolders.

!ls -R /content/data


/content/data:
common-diseases-of-tea-leaves-in-sri-lanka

/content/data/common-diseases-of-tea-leaves-in-sri-lanka:
'Algal Leaf'  'Bird Eye Spot'  'Gray Light'  'Red Leaf Spot'
 Anthracnose  'Brown Blight'    Healthy      'White Spot'

'/content/data/common-diseases-of-tea-leaves-in-sri-lanka/Algal Leaf':
 UNADJUSTEDNONRAW_thumb_10.jpg		 UNADJUSTEDNONRAW_thumb_45.jpg
 UNADJUSTEDNONRAW_thumb_11.jpg		 UNADJUSTEDNONRAW_thumb_46.jpg
 UNADJUSTEDNONRAW_thumb_12.jpg		 UNADJUSTEDNONRAW_thumb_47.jpg
 UNADJUSTEDNONRAW_thumb_13.jpg		 UNADJUSTEDNONRAW_thumb_48.jpg
 UNADJUSTEDNONRAW_thumb_14.jpg		 UNADJUSTEDNONRAW_thumb_49.jpg
 UNADJUSTEDNONRAW_thumb_15.jpg		'UNADJUSTEDNONRAW_thumb_4a - Copy.jpg'
 UNADJUSTEDNONRAW_thumb_16.jpg		 UNADJUSTEDNONRAW_thumb_4a.jpg
 UNADJUSTEDNONRAW_thumb_17.jpg		'UNADJUSTEDNONRAW_thumb_4b - Copy.jpg'
 UNADJUSTEDNONRAW_thumb_18.jpg		 UNADJUSTEDNONRAW_thumb_4b.jpg
 UNADJUSTEDNONRAW_thumb_19.jpg		'UNADJUSTEDNONRAW_thumb_4c - Copy.jpg'
'UNADJUSTEDNONRAW_thumb_1a - Copy.jpg'

#Flatten the folder structure

In [24]:
import os, shutil

# Flatten nested folders (move inner images up one level)
root_dir = "/content/data/common-diseases-of-tea-leaves-in-sri-lanka"

for class_folder in os.listdir(root_dir):
    class_path = os.path.join(root_dir, class_folder)
    if os.path.isdir(class_path):
        inner_path = os.path.join(class_path, class_folder)
        if os.path.exists(inner_path):
            for file in os.listdir(inner_path):
                shutil.move(os.path.join(inner_path, file), class_path)
            shutil.rmtree(inner_path)  # remove the empty inner folder


!ls /content/data/common-diseases-of-tea-leaves-in-sri-lanka


'Algal Leaf'  'Bird Eye Spot'  'Gray Light'  'Red Leaf Spot'
 Anthracnose  'Brown Blight'    Healthy      'White Spot'
