In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf

# Define some parameters for the data loader:
BATCH_SIZE = 32
IMG_SIZE = 224

# Path of the dataset
input_path = '/content/drive/MyDrive/Dataset/Paddy_doctor/'
train_data_dir = input_path + 'train_images/'
test_data_dir = input_path + 'test_images/'

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split = 0.2,
    subset="training",
    seed = 123,
    image_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,shuffle = True)
val_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split = 0.2,
    subset="validation",
    seed = 123,
    image_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,shuffle = True)

AUTOTUNE = tf.data.AUTOTUNE
def prepare_dataset(ds):
    def input_preprocess(image, label):
        label = tf.one_hot(label, n_classes)
        return image, label
    ds = ds.map(input_preprocess, num_parallel_calls = AUTOTUNE)
    ds = ds.cache().prefetch(buffer_size = AUTOTUNE)
    return ds

class_names = train_ds.class_names
n_classes = len(class_names)
print(class_names)
train_ds = prepare_dataset(train_ds)
val_ds = prepare_dataset(val_ds)

Found 10407 files belonging to 10 classes.
Using 8326 files for training.
Found 10407 files belonging to 10 classes.
Using 2081 files for validation.
['bacterial_leaf_blight', 'bacterial_leaf_streak', 'bacterial_panicle_blight', 'blast', 'brown_spot', 'dead_heart', 'downy_mildew', 'hispa', 'normal', 'tungro']


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  #print("asd",labels.shape)
  #print("asd",images.shape)
  #print(labels)
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i].numpy().tolist().index(1)])
    plt.axis("off")

In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from pathlib import Path
import shutil


dir_loc="/content/drive/MyDrive/Dataset/Paddy_doctor/"
df = pd.read_csv(dir_loc+'train.csv')

test_loc=dir_loc+"train_images"
temp_loc=dir_loc+"tempData"

target = df.loc[:,'label']
fold_no = 1
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
for train_index, test_index in kfold.split(df, target):
  lokasi= temp_loc+"/"+str(fold_no)
  Path(lokasi).mkdir(parents=True, exist_ok=True)
  test = df.loc[test_index,:]
  train = df.loc[train_index,:]
  test.to_csv(lokasi+"/test.csv")
  train.to_csv(lokasi+"/train.csv")
  print(fold_no)
  print(train.shape,test.shape)
  fold_no += 1

#ds = train_ds.concatenate(val_ds)
# KFOLD un balance data
#kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
#for train, test in kfold.split(np.arange(10407),np.arange(10407)):
#print(train)
#df.head()

1
(8325, 4) (2082, 4)
2
(8325, 4) (2082, 4)
3
(8326, 4) (2081, 4)
4
(8326, 4) (2081, 4)
5
(8326, 4) (2081, 4)


In [None]:
from os.path import exists as file_exists
import pandas as pd
from pathlib import Path
import shutil

dir_loc="/content/drive/MyDrive/Dataset/Paddy_doctor/"
df = pd.read_csv(dir_loc+'train.csv')

test_loc=dir_loc+"train_images"
temp_loc=dir_loc+"tempData"

def copyfile(src,dst):
  try:
    shutil.copyfile(src, dst)
    #print("File copied successfully.")
    # If source and destination are same
  except shutil.SameFileError:
      print("Source and destination represents the same file.")
  # If destination is a directory.
  except IsADirectoryError:
      print("Destination is a directory.")
  # If there is any permission issue
  except PermissionError:
      print("Permission denied.")
  # For other errors
  except:
      print("Error occurred while copying file.")

def copydata_test(lokasi,test) :  
  ### split testing folder
  lokasi_baru_test=lokasi+"/test"
  Path(lokasi_baru_test).mkdir(parents=True, exist_ok=True)
  a=0
  for index, row in test.iterrows():
    imagefile="/".join([test_loc,row['label'],row['image_id']])
    label_folder="/".join([lokasi_baru_test,row['label']])
    Path(label_folder).mkdir(parents=True, exist_ok=True)
    newfile="/".join([label_folder,row['image_id']])
    if not file_exists(imagefile) :
      print(imagefile," tidak ada")
    copyfile(imagefile, newfile)
    a=a+1
  print(a)

def copydata_train(lokasi,train) : 
  ### split train folder
  lokasi_baru_train=lokasi+"/train"
  Path(lokasi_baru_train).mkdir(parents=True, exist_ok=True)
  a=0
  for index, row in train.iterrows():
    imagefile="/".join([test_loc,row['label'],row['image_id']])
    label_folder="/".join([lokasi_baru_train,row['label']])
    Path(label_folder).mkdir(parents=True, exist_ok=True)
    newfile="/".join([label_folder,row['image_id']])
    if not file_exists(imagefile) :
      print(imagefile," tidak ada")
    copyfile(imagefile, newfile)
    a=a+1
  print(a)

lokasi= temp_loc+"/5"
train=pd.read_csv(lokasi+"/train.csv")
test=pd.read_csv(lokasi+"/test.csv")
print(train.shape)
print(test.shape)
copydata_test(lokasi,test)
copydata_train(lokasi,train)

(8326, 5)
(2081, 5)
2081
8326


In [None]:
import os


def countfile(path,ext=".jpg"):
  jml=0;
  for root, dirs, files in os.walk(path):
    for file in files:
      if(file.endswith(ext)):
        jml=jml+1
  return jml

print(countfile(lokasi))

10407


In [None]:
import tensorflow as tf

BATCH_SIZE = 32
IMG_SIZE = 64

dataset = tf.keras.utils.image_dataset_from_directory(
    lokasi+"/test",
    label_mode='categorical',labels='inferred',
    image_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,shuffle = True)
dataset = tf.keras.utils.image_dataset_from_directory(
    lokasi+"/train",
    label_mode='categorical',labels='inferred',
    image_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,shuffle = True)

Found 2081 files belonging to 10 classes.
Found 8326 files belonging to 10 classes.


In [None]:
import tensorflow as tf

# Define some parameters for the data loader:
BATCH_SIZE = 100
IMG_SIZE = 64

# Path of the dataset
input_path = '/content/drive/MyDrive/Dataset/Paddy_doctor/'
train_data_dir = input_path + 'train_images/'
test_data_dir = input_path + 'test_images/'

dataset = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    label_mode='categorical',labels='inferred',
    image_size = (IMG_SIZE, IMG_SIZE),
    batch_size = BATCH_SIZE,shuffle = True)

AUTOTUNE = tf.data.experimental.AUTOTUNE
dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)

#a=1
#for image_batch, labels_batch in dataset:
  #print(image_batch.shape)
  #print(labels_batch.shape)
  #print(a)
  #a=a+1

In [None]:
val_ds
a=1
for element in val_ds:
  print(type(element))
  print(a)
  a=a+1

In [None]:
path ="/content/drive/MyDrive/Dataset/Paddy_doctor/tempData/data_train.h5"
val_ds.save(path)
new_dataset = tf.data.Dataset.load(path)


In [None]:
# load and evaluate a saved model
from numpy import loadtxt
from tensorflow.keras.models import load_model
import os

dir_path = '/content/drive/MyDrive/Dataset/Paddy_doctor/hasil_model'

# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
      # load model
      file_model="/".join([dir_path,path])
      print(file_model)
      model = load_model(file_model)
      # summarize model.
      #model.summary()
      # load dataset
      score = model.evaluate(new_dataset, verbose=1)
      print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))        

