In [None]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # hanya diperlukan untuk disable GPU

import tensorflow as tf  
import tensorflow.keras as keras  # pakai Keras dari tensorflow
import os  
from tensorflow.keras.layers import Flatten, Dense, AveragePooling2D, GlobalAveragePooling2D  
from tensorflow.keras.models import Model  
from tensorflow.keras.optimizers import RMSprop, SGD  
from tensorflow.keras.callbacks import ModelCheckpoint  
from tensorflow.keras.callbacks import EarlyStopping  
from tensorflow.keras.preprocessing.image import ImageDataGenerator  
from tensorflow.keras.callbacks import CSVLogger  
from tensorflow.keras.layers import BatchNormalization  
from tensorflow.keras.models import load_model  
import numpy as np  
from pathlib import Path  
import shutil  

In [None]:
tf.config.list_physical_devices('GPU') # daftar GPU tersedia


In [None]:
tf.config.list_physical_devices('CPU') # daftar CPU tersedia


In [None]:
print("Tensorflow version: "+tf.__version__) # cek versi Tensorflow

In [None]:
 #making training & validation directories  
 import pathlib  
 session='sesi-01'  
 classnames=['Black-grass','Charlock','Cleavers','Common Chickweed','Common wheat','Fat Hen','Loose Silky-bent','Maize','Scentless Mayweed','Shepherds Purse','Small-flowered Cranesbill','Sugar beet']  
 train_dir=session+"/train"  
 valid_dir=session+"/valid"  
 for dirname in classnames:  
 #  print(dirname)  
   fulldirname=train_dir+'/'+dirname    
   print(fulldirname)  
   pathlib.Path(fulldirname).mkdir(parents=True, exist_ok=True)  
   fulldirname=valid_dir+'/'+dirname    
   print(fulldirname)  
   pathlib.Path(fulldirname).mkdir(parents=True, exist_ok=True)  

In [None]:
#copy image files, split 80% training- 20% validation  

original_data_dir="original-image/plant-seedlings-classification/train"

counter=0  
for root, dirs, files in os.walk(original_data_dir): 
    print(root)
    for file in files:  
        fullfilename = os.path.join(root, file)        
        basename=os.path.basename(fullfilename)  
       #detect image classification from directory name  
        split1=os.path.split(fullfilename)        
        split2=os.path.split(split1[0])  
        classname=str(split2[1])#classname for this particular file  
        if((counter%5)==0): #copy validation  
            dst_filename=valid_dir+"/"+classname+"/"+basename  
            print("copy "+fullfilename+" -> "+dst_filename)
            shutil.copyfile(fullfilename,dst_filename)      
        else:       #copy training    
            dst_filename=train_dir+"/"+classname+"/"+basename  
            shutil.copyfile(fullfilename,dst_filename)      
        counter=counter+1  

In [None]:
#prepare model    
img_width=224
img_height=224

cnn_notop=keras.applications.nasnet.NASNetMobile(input_shape=(img_width, img_height, 3)
                                                 , include_top=False
                                                 , weights='imagenet' # menggunakan transfer learning
#                                                 , weights=None # tanpa transfer learning
                                                 , input_tensor=None
                                                 , pooling=None)


In [None]:
x = cnn_notop.output  
x = GlobalAveragePooling2D()(x)      
x = Dense(1024, activation='relu')(x)      
x = BatchNormalization()(x)  
predictions = Dense(12, activation='softmax')(x)  
the_model = Model(cnn_notop.input, predictions)  

In [None]:
#training  
learning_rate = 0.0001   
logfile = session + '-train' + '.log'   
batch_size=32
nbr_epochs=100
print("training  directory: "+train_dir)  
print("valication directory: "+valid_dir)  
optimizer = SGD(lr=learning_rate, momentum=0.9, decay=0.0, nesterov=True)  
the_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])  
csv_logger = CSVLogger(logfile, append=True)  
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')  
best_model_filename=session+'-weights.{epoch:02d}-{val_loss:.2f}.h5'   
best_model = ModelCheckpoint(best_model_filename, monitor='val_accuracy', verbose=1, save_best_only=True)  
  # this is the augmentation configuration we will use for training  
train_datagen = ImageDataGenerator(  
    rescale=1. / 255,  
    shear_range=0.2,  
    zoom_range=0.2,  
    rotation_range=90,  
    width_shift_range=0.2,  
    height_shift_range=0.2,  
    horizontal_flip=True,  
    vertical_flip=True)  
val_datagen = ImageDataGenerator(rescale=1. / 255)  
print('prepare train generator')   
train_generator = train_datagen.flow_from_directory(  
    train_dir,  
    target_size=(img_width, img_height),  
    batch_size=batch_size,  
    shuffle=True,  
    classes=classnames,  
    class_mode='categorical')  
print('prepare validation generator')   
validation_generator = val_datagen.flow_from_directory(  
    valid_dir,  
    target_size=(img_width, img_height),  
    batch_size=batch_size,  
    shuffle=True,  
    classes=classnames,  
    class_mode='categorical')  
print('fit generator')   
the_model.fit(  
    x=train_generator,  
    epochs=nbr_epochs,  
    verbose=1,  
    validation_data=validation_generator,
    callbacks=[best_model, csv_logger, early_stopping]
) 

In [None]:
#prediction   
batch_size=4  
nbr_test_samples=794    
#choose weights file manually   
#weights_path = 'simpleNASNet-weights.10-0.17.h5' # choose file manually, filename may be different  
#weights_path = 'sesi-01-weights.08-0.58.h5'
weights_path = 'sesi-01-weights.07-2.48.h5'
test_data_dir = session+'/test/'   

test_datagen = ImageDataGenerator(rescale=1./255)   
test_generator = test_datagen.flow_from_directory(   
    directory=test_data_dir,   
    target_size=(img_width, img_height),   
    batch_size=batch_size,   
    shuffle = False, # no shuffling, since filenames must match predictions. Shuffling may change file sequence   
    classes = None, #    
    class_mode = None)   
test_image_list = test_generator.filenames   
print('Loading model and weights')   
predict_model = load_model(weights_path)   
#predict_model = the_model


In [None]:
predict_model.summary() 

In [None]:
print('Begin to predict for testing data ...')   
predictions = predict_model.predict(x=test_generator, steps=nbr_test_samples/batch_size,verbose=1)   
np.savetxt(session+'-predictions.txt', predictions) # store prediction matrix, for later analysis if necessary   

In [None]:
#submission  
submission_file=session+'-submit.csv'   
print('Begin to write submission file:'+submission_file)   
f_submit = open(submission_file, 'w')   
f_submit.write('file,species\n')   
for i, image_name in enumerate(test_image_list):   
  # find maximum prediction of 12  
  max_index=0  
  max_value=0  
  for x in range(0, 12):  
    if(predictions[i][x]>max_value):  
      max_value=predictions[i][x]  
      max_index=x  
  basename=os.path.basename(image_name)   
  prediction_class = classnames[max_index] # get predictions from array     
  f_submit.write('%s,%s\n' % (basename, prediction_class))   
f_submit.close()   
print('Finished write submission file ..')  