In [1]:
import pandas as pd
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

In [9]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [7]:
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

## DataFrame

In [None]:
directory = '../input/sorghum-id-fgvc-9/'
train_dir = directory + 'train_images/'
test_dir = directory + 'test/'
train_csv = pd.read_csv(directory+'train_cultivar_mapping.csv')
submission_csv = pd.read_csv(directory+'sample_submission.csv')

In [None]:
train_csv

In [None]:
train_csv = train_csv.dropna(axis=0)

In [None]:
train_csv

In [None]:
label_class = train_csv.cultivar.unique()
label_class

In [None]:
train_csv.cultivar.value_counts()

## Overview Image

In [None]:
n = 5
plt.figure(figsize=(10, 10))
for i in range(n*n):
    path = train_dir + train_csv.loc[i, 'image']
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.subplot(n, n, i+1)
    plt.imshow(image)
    plt.title(train_csv.loc[i, 'cultivar'])
    plt.axis('off')
plt.plot()

In [None]:
i = 0
path = train_dir + train_csv.loc[i, 'image']
image = cv2.imread(path)
image.shape

## Tensorflow Data

In [None]:
import tensorflow as tf
import tensorflow.keras as keras

seed = 123
IMG_SIZE = [256, 256]

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU', tpu.master())
except:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()
print("REPLICAS : ", strategy.num_replicas_in_sync)

In [None]:
AUTO = tf.data.AUTOTUNE
BATCH_SIZE = 64 * strategy.num_replicas_in_sync

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.1
)

In [None]:
train_ds = datagen.flow_from_dataframe(
    train_csv,
    directory=train_dir,
    x_col='image',
    y_col='cultivar',
    target_size = IMG_SIZE,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=seed,
    subset='training'
)

valid_ds = datagen.flow_from_dataframe(
    train_csv,
    directory=train_dir,
    x_col='image',
    y_col='cultivar',
    target_size = IMG_SIZE,
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    seed=seed,
    subset='validation'
)

In [None]:
len(valid_ds)

## Model

In [None]:
with strategy.scope():
    base_model = tf.keras.applications.efficientnet.EfficientNetB7(
        include_top = False,
        weights='imagenet',
        input_shape=[*IMG_SIZE, 3]
    )

    base_model.trainable = False
    
    model = keras.Sequential([
        base_model,
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(len(label_class), activation='softmax')
    ])

In [None]:
model.compile(
    loss=keras.losses.CategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(0.001),
    metrics=['accuracy']
)
model.summary()

In [None]:
EPOCHS = 10
STEPS_PER_EPOCH = len(train_ds)
VALID_STEPS = len(valid_ds)

history = model.fit(
    train_ds,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_data=valid_ds,
    validation_steps=VALID_STEPS
)