In [1]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'plantvillage-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F277323%2F658267%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240310%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240310T145249Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D99be4dc1c8d45cad758dcb4d59817f8b5859c6082c5946048abd102f9eecbd6705a32b55dc10ae3ff92d07983ad3952d5eb02a5e887fa9a258c8fa2f16d4756822c6edc67ed8d533bf8f6ebac5ef2fe8bc676eebf6406bdbc5b2d50a7f5b256ec5398ea5957f9df1b23598ac486779af6219c2c07d5dbfed00fd5624c009f48e5b989fe9a6abba1e44bda06be517bb3b70c356a7da2f6c5d3a1e05cf26d1d4c613d02162d0908737d09975f68cbedc1f136566857fe5086a795b9139ccc76f988b8f7b485560da96178c208b194d296e26efa24dec9e35f416dcd8bf2ba6d293996b066cebc609295b0bf5ccde917e1d8ae315d5c9a9dfb205a22f89a77d5783'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')

Failed to load (likely expired) https://storage.googleapis.com/kaggle-data-sets/277323/658267/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240310%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240310T145249Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=99be4dc1c8d45cad758dcb4d59817f8b5859c6082c5946048abd102f9eecbd6705a32b55dc10ae3ff92d07983ad3952d5eb02a5e887fa9a258c8fa2f16d4756822c6edc67ed8d533bf8f6ebac5ef2fe8bc676eebf6406bdbc5b2d50a7f5b256ec5398ea5957f9df1b23598ac486779af6219c2c07d5dbfed00fd5624c009f48e5b989fe9a6abba1e44bda06be517bb3b70c356a7da2f6c5d3a1e05cf26d1d4c613d02162d0908737d09975f68cbedc1f136566857fe5086a795b9139ccc76f988b8f7b485560da96178c208b194d296e26efa24dec9e35f416dcd8bf2ba6d293996b066cebc609295b0bf5ccde917e1d8ae315d5c9a9dfb205a22f89a77d5783 to path /kaggle/input/plantvillage-dataset
Data source import complete.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# -*- coding: utf-8 -*-
"""Untitled3.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1_7AzFr6ROKutEDjaMCJBAWnAveDGpTxv
"""

import numpy as np
import pickle
import cv2
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from keras.models import Sequential
from keras.layers import BatchNormalization
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Activation, Flatten, Dropout, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow

EPOCHS = 25
INIT_LR = 1e-3
BS = 32
default_image_size = tuple((256, 256))
image_size = 0
directory_root = '/kaggle/input/plantvillage-dataset'
width=256
height=256
depth=3

def convert_image_to_array(image_dir):
    try:
        print(image_dir)
        image = cv2.imread(image_dir)
        if image is not None :
            image = cv2.resize(image, default_image_size)
            return img_to_array(image)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

image_list, label_list = [], []
try:
    print("[INFO] Loading images ...")
    root_dir = listdir(directory_root)
    for directory in root_dir :
        # remove .DS_Store from list
        if directory == ".DS_Store" :
            root_dir.remove(directory)

    for plant_folder in root_dir :
        plant_disease_channel_list = listdir(f"{directory_root}/{plant_folder}")

        for channel_folder in plant_disease_channel_list :
            # remove .DS_Store from list
            if channel_folder == ".DS_Store" :
                plant_disease_channel_list.remove(disease_folder)

        for plant_channel_folder in plant_disease_channel_list:
            print(f"[INFO] Processing {plant_channel_folder} ...")
            plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}/{plant_channel_folder}/")

            for plant_disease_folder in plant_disease_folder_list :
                if plant_disease_folder == ".DS_Store" :
                   plant_disease_folder_list.remove(plant_disease_folder)

            for image_folder in plant_disease_folder_list:
                image_directory = listdir(f"{directory_root}/{plant_folder}/{plant_channel_folder}/{image_folder}")

                for image_directory_temp in image_directory :
                    if image_directory_temp == ".DS_Store" :
                       image_directory.remove(plant_disease_folder)

                for single_image in image_directory[:200]:
                    image_address = f"{directory_root}/{plant_folder}/{plant_channel_folder}/{plant_disease_folder}/{single_image}"

                    if image_address.endswith(".jpg") == True or image_address.endswith(".JPG") == True:
                       image_list.append(convert_image_to_array(image_address))
                       label_list.append(plant_disease_folder)

    print("[INFO] Image loading completed")
except Exception as e:
    print(f"Error : {e}")

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
/kaggle/input/plantvillage-dataset/plantvillage dataset/segmented/Tomato___Bacterial_spot/c26ad9b9-bc59-4cf1-93d3-630fba7ee460___FAM_B.Msls 4187_final_masked.jpg
/kaggle/input/plantvillage-dataset/plantvillage dataset/segmented/Tomato___Bacterial_spot/decd47b1-34d6-40f8-94d3-2fcaba5ea7d2___FAM_B.Msls 4388_final_masked.jpg
/kaggle/input/plantvillage-dataset/plantvillage dataset/segmented/Tomato___Bacterial_spot/989c0a62-1c16-48bd-8721-fb4bbf1e313f___FAM_B.Msls 4384_final_masked.jpg
/kaggle/input/plantvillage-dataset/plantvillage dataset/segmented/Tomato___Bacterial_spot/c449f5bb-9b5e-40f9-925e-63305a6f8c3f___FAM_B.Msls 1091_final_masked.jpg
/kaggle/input/plantvillage-dataset/plantvillage dataset/segmented/Tomato___Bacterial_spot/bc99eee4-c8a1-40bb-b98b-315294fbd34e___FAM_B.Msls 3867_final_masked.jpg
/kaggle/input/plantvillage-dataset/plantvillage dataset/segmented/Tomato___Bacterial_spot/cf52073e-d45d-41f5-87fd-c781ba37fdf7___FAM_B.Msls 

In [None]:
image_size = len(image_list)
print(label_list)
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)
pickle.dump(label_binarizer,open('label_transform.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)

print(label_binarizer.classes_)

np_image_list = np.array(image_list, dtype=np.float16) / 225.0

print("[INFO] Spliting data to train, test")
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42)

aug = ImageDataGenerator(
    rotation_range=25, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2,
    zoom_range=0.2,horizontal_flip=True,
    fill_mode="nearest")

model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth, height, width)
    chanDim = 1
model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation("softmax"))

opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# distribution
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])
# train the network
print("[INFO] training network...")

history = model.fit_generator(
    aug.flow(x_train, y_train, batch_size=BS),
    validation_data=(x_test, y_test),
    steps_per_epoch=len(x_train) // BS,
    epochs=EPOCHS, verbose=1
    )

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()

print("[INFO] Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")

# save the model to disk
print("[INFO] Saving model...")
pickle.dump(model,open('cnn_model.pkl', 'wb'))

['Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 'Tomato___Bacterial_spot', 

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (22654,) + inhomogeneous part.