In [2]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# create directory for data
!mkdir data

# unzip files from zip folder
!unzip "/content/drive/MyDrive/szum_splits/split1" -d "data/"

In [4]:
# Python program to create
# Image Classifier using CNN

# Importing the required libraries
import cv2
import os
import numpy as np
from random import shuffle
from tqdm import tqdm

In [6]:
'''Setting up the env'''

TRAIN_DIR = 'data/split1/train'
TEST_DIR = 'data/split1/val'
IMG_SIZE = (3982, 2700)  # Size of the image
LR = 1e-3

'''Setting up the model which will help with tensorflow models'''
MODEL_NAME = 'plants-detection-{}-{}.model'.format(LR, '6conv-basic')

In [26]:
'''Creating the training data'''
def create_train_data():
    # Creating an empty list where we should store the training data
    # after a little preprocessing of the data
    training_data = []

    # tqdm is only used for interactive loading
    # loading the training data
    for label in os.listdir(TRAIN_DIR):
        path = os.path.join(TRAIN_DIR, label)
        print("label:", label)
        print("path:", path)
        for img in tqdm(os.listdir(path)):
            # print("img:", img)
            # labeling the images
            label = path.split('/')[-1]

            img_path = os.path.join(TRAIN_DIR, img)

            # loading the image from the path and then converting them into
            # grayscale for easier covnet prob
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

            # final step-forming the training data list with numpy array of the images
            training_data.append([np.array(img), np.array(label)])

    # shuffling of the training data to preserve the random state of our data
    shuffle(training_data)

    # saving our trained data for further uses if required
    np.save('train_data.npy', training_data)
    return training_data

'''Processing the given test data'''
# Almost same as processing the training data but
# we dont have to label it.
def process_test_data():
    testing_data = []
    for label in os.listdir(TEST_DIR):
        path = os.path.join(TEST_DIR, label)
        print("label:", label)
        print("path:", path)
        for img in tqdm(os.listdir(path)):
            img_num = img.split('.')[0]
            path = os.path.join(TEST_DIR, img)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            testing_data.append([np.array(img), img_num])

    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data

'''Running the training and the testing in the dataset for our model'''
train_data = create_train_data()
test_data = process_test_data()

label: Rust
path: data/split1/train/Rust


100%|██████████| 403/403 [00:00<00:00, 24012.05it/s]


label: Powdery
path: data/split1/train/Powdery


100%|██████████| 400/400 [00:00<00:00, 7096.51it/s]


label: Healthy
path: data/split1/train/Healthy


100%|██████████| 422/422 [00:00<00:00, 10449.11it/s]

label:




 Rust
path: data/split1/val/Rust


100%|██████████| 50/50 [00:00<00:00, 27377.96it/s]


label: Powdery
path: data/split1/val/Powdery


100%|██████████| 50/50 [00:00<00:00, 31385.09it/s]


label: Healthy
path: data/split1/val/Healthy


100%|██████████| 53/53 [00:00<00:00, 4397.76it/s]
