In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import os
import cv2

from glob import glob

In [2]:
!pip install wolta

Collecting wolta
  Downloading wolta-0.3.5-py3-none-any.whl.metadata (960 bytes)
Collecting imblearn (from wolta)
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Downloading wolta-0.3.5-py3-none-any.whl (17 kB)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: imblearn, wolta
Successfully installed imblearn-0.0 wolta-0.3.5


In [3]:
pd.DataFrame(np.random.randn(100)).to_csv('./t.csv')

# Data Analysis

In [4]:
for dirname, _, _ in os.walk('/kaggle/input'):
    print(dirname)

/kaggle/input
/kaggle/input/african-plums-quality-and-defect-assessment-data
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/unripe
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/cracked
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/rotten
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/spotted
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/bruised
/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/unaffected


In [5]:
d_paths = glob('/kaggle/input/african-plums-quality-and-defect-assessment-data/african_plums_dataset/african_plums/*')
i_paths = []

for d_path in d_paths:
    i_paths.extend(glob('{}/*'.format(d_path)))

print(len(i_paths))

4507


In [6]:
from wolta.visual_tools import dataset_size_same

dataset_size_same(i_paths)

False

In [7]:
from wolta.visual_tools import dataset_ratio_same

dataset_ratio_same(i_paths)

False

# Image Processing and Stacking

In [8]:
from wolta.visual_tools import crop

for d_path in d_paths:
    d_name = d_path.split('/')[-1]
    save_dir = '/kaggle/working/{}'.format(d_name)
    os.makedirs(save_dir)
    
    i_paths = glob('{}/*'.format(d_path))
    for i_path in i_paths:
        i_name = i_path.split('/')[-1]
        img = cv2.imread(i_path)
        
        edge = min(img.shape[0], img.shape[1])
        
        img = crop(img, crop_width=edge, crop_height=edge, get_img=True)
        img = cv2.resize(img, (128, 128))
        cv2.imwrite('{}/{}'.format(save_dir, i_name), img)

# Data Splitting

In [9]:
train_ds, test_val_ds = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/working',
    validation_split=0.4,
    subset='both',
    seed=123,
    image_size=(128, 128),
    batch_size=16
)

Found 4507 files belonging to 6 classes.
Using 2705 files for training.
Using 1802 files for validation.


In [10]:
test_val_ds_size = tf.data.experimental.cardinality(test_val_ds).numpy()
test_val_split_size = int(0.5 * test_val_ds_size)

validation_ds = test_val_ds.take(test_val_split_size)
test_ds = test_val_ds.skip(test_val_split_size)

In [11]:
names = train_ds.class_names
num_classes = len(names)

print(names)

['bruised', 'cracked', 'rotten', 'spotted', 'unaffected', 'unripe']


In [12]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Model

In [13]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [14]:
model = Sequential([
    layers.Input(shape=[128, 128, 3]),
    layers.Rescaling(1./255),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

In [15]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [16]:
model.summary()

In [17]:
history = model.fit(
    train_ds,
    validation_data=validation_ds,
    epochs=5
)

Epoch 1/5
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 154ms/step - accuracy: 0.4747 - loss: 1.4024 - val_accuracy: 0.5703 - val_loss: 1.2134
Epoch 2/5
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 148ms/step - accuracy: 0.6013 - loss: 1.1254 - val_accuracy: 0.5759 - val_loss: 1.1730
Epoch 3/5
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 148ms/step - accuracy: 0.6215 - loss: 1.0477 - val_accuracy: 0.5938 - val_loss: 1.1457
Epoch 4/5
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 149ms/step - accuracy: 0.6832 - loss: 0.8980 - val_accuracy: 0.5882 - val_loss: 1.1241
Epoch 5/5
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 150ms/step - accuracy: 0.7154 - loss: 0.7969 - val_accuracy: 0.6283 - val_loss: 1.1264


In [18]:
loss, acc = model.evaluate(test_ds)

print(f"Test Accuracy: {acc * 100:.2f}%")

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - accuracy: 0.6445 - loss: 1.0587
Test Accuracy: 63.80%
