In [1]:
#get rid of annoying GPU warnings (and others)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold



In [2]:
from numpy.random import seed
seed(42)
import tensorflow as tf
tf.random.set_seed(42)

import matplotlib.pyplot as plt      # MATLAB like plotting routines
import random                        # for generating random numbers

from keras.models import Sequential, model_from_json  # Model type to be used

from keras.layers.core import Dense, Dropout, Activation # Types of layers to be used in our model
from keras.utils import np_utils                         # NumPy related tools
from pathlib import Path

from keras.layers import Flatten
from keras.regularizers import l2
from keras.callbacks import LearningRateScheduler
from keras.callbacks import History
from keras.optimizers import Adam

from keras import losses
from sklearn.utils import shuffle

import numpy
from sklearn.model_selection import GridSearchCV
import keras_tuner as kt

**Get the data**

In [3]:
def load_image_files(container_path, dimension=(128,128)):
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "Your own dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            if file.suffix.lower() not in ['.jpg', '.jpeg', '.png','.webp','.gif']:
                print(f"Skipped non-image file: {file}")
                continue
            img = cv2.imread(str(file), cv2.IMREAD_COLOR)
            if img is None:
                print(f"Failed to read image: {file}")
                continue
            img_resized = cv2.resize(img, dimension, interpolation=cv2.INTER_AREA)
            
            if img_resized.size == 0:
                print(f"Empty image: {file}")
                continue
            flat_data.append(img_resized.flatten())
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)


    return Bunch(
        data=flat_data,
        target=target,
        target_names=categories,
        images=images,
        DESCR=descr
    )

In [4]:
image_dataset = load_image_files("/kaggle/input/soil-data-v3/Soil_Data_V3/Trains")
image_dataset_test = load_image_files("/kaggle/input/soil-data-v3/Soil_Data_V3/Tests")

Skipped non-image file: /kaggle/input/soil-data-v3/Soil_Data_V3/Trains/Mary/desktop.ini
Skipped non-image file: /kaggle/input/soil-data-v3/Soil_Data_V3/Trains/Sand/desktop.ini
Skipped non-image file: /kaggle/input/soil-data-v3/Soil_Data_V3/Trains/Silt/desktop.ini
Skipped non-image file: /kaggle/input/soil-data-v3/Soil_Data_V3/Tests/Chalky/desktop.ini


**Split the data**

In [5]:
X_train = image_dataset.images
X_test = image_dataset_test.images
y_train = image_dataset.target
y_test=image_dataset_test.target

In [6]:
print(X_train.shape)

(3670, 128, 128, 3)


In [7]:
from keras.models import Sequential
from keras.regularizers import l2
from keras.layers import Conv2D, AveragePooling2D, Activation, Dropout, Flatten, BatchNormalization, Dense
from keras.optimizers import Adam

def create_model(hp):
    model = Sequential()

    model.add(Conv2D(32, (3, 3), input_shape=(128, 128, 3), kernel_regularizer=l2(hp.Float('weight_decay', min_value=1e-6, max_value=1e-2)))) # Use hp.Float for weight_decay choice
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))

    model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(BatchNormalization(axis=-1))
    model.add(Dropout(hp.Float('dropout_1', 0, 0.2)))  # Use hp.Float for dropout choice

    model.add(Conv2D(64, (3, 3), kernel_regularizer=l2(hp.Float('weight_decay', 1e-6, 1e-2))))  # Use hp.Float for weight_decay choice
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))
    model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(BatchNormalization(axis=-1))
    model.add(Dropout(hp.Float('dropout_2', 0, 0.2)))  # Use hp.Float for dropout choice

    model.add(Conv2D(128, (3, 3), kernel_regularizer=l2(hp.Float('weight_decay', 1e-6, 1e-2))))  # Use hp.Float for weight_decay choice
    model.add(Conv2D(128, (3, 3), kernel_regularizer=l2(hp.Float('weight_decay', 1e-6, 1e-2))))
    model.add(BatchNormalization(axis=-1))
    model.add(Activation('relu'))
    model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dropout(hp.Float('dropout_3', 0,0.2)))  # Use hp.Float for dropout choice

    model.add(Dense(512))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(hp.Float('dropout_4', 0, 0.2)))  # Use hp.Float for dropout choice

    model.add(Dense(8))
    model.add(Activation('softmax'))

    # Compile the model
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-3, 1e-4, 1e-5])),
        metrics=['accuracy']
    )

    return model




In [8]:
from keras_tuner import HyperParameters

# Create a hyperparameter object
hp = HyperParameters()

In [9]:
seed = 42
numpy.random.seed(seed)
batch_size = 8
epochs = 30

from keras import callbacks
earlystopping = callbacks.EarlyStopping(monitor="val_loss",
                                        mode="min", patience=10,
                                        restore_best_weights=True)

In [10]:
model=create_model(hp)

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 batch_normalization (BatchN  (None, 126, 126, 32)     128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 126, 126, 32)      0         
                                                                 
 average_pooling2d (AverageP  (None, 63, 63, 32)       0         
 ooling2D)                                                       
                                                                 
 batch_normalization_1 (Batc  (None, 63, 63, 32)       128       
 hNormalization)                                                 
                                                        

In [13]:
from keras_tuner.tuners import RandomSearch
# Create a tuner and start the hyperparameter search
tuner = RandomSearch(
    create_model,
    objective='val_accuracy',
    max_trials=10,
    directory='my_dir',
    project_name='my_project'
)

# Perform the hyperparameter search
tuner.search(X_train, y_train, epochs=80, validation_data=(X_test, y_test))

Trial 10 Complete [00h 04m 07s]
val_accuracy: 0.8699495196342468

Best val_accuracy So Far: 0.8844696879386902
Total elapsed time: 00h 32m 48s


In [14]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best Hyperparameters:", best_hps.values)

Best Hyperparameters: {'weight_decay': 0.004892610191809718, 'dropout_1': 0.15677940817371858, 'dropout_2': 0.08969994181401919, 'dropout_3': 0.08548384513232879, 'dropout_4': 0.022332840753152984, 'learning_rate': 0.0001}


In [15]:
import time
best_model = tuner.hypermodel.build(best_hps)
start_time=time.time()
best_model.fit(X_train, y_train,batch_size=8, epochs=80, validation_data=(X_test, y_test),callbacks=[earlystopping])
end_time = time.time()
elapsed_time = end_time - start_time
print("Elapsed Time:", round(elapsed_time, 2), "seconds")

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Elapsed Time: 170.02 seconds


In [16]:
start_time=time.time()
score = best_model.evaluate(X_test, y_test)                   
print('Test score:', score[0])
print('Test accuracy:', round(score[1]*100,2))
end_time = time.time()
elapsed_time = end_time - start_time
print("Elapsed Time:", round(elapsed_time, 2), "seconds")

Test score: 0.7055201530456543
Test accuracy: 87.75
Elapsed Time: 0.63 seconds


In [None]:
from keras.models import save_model
best_model.save('optimal_cnn.h5')