<a href="https://colab.research.google.com/github/KamilRizatdinov/AgroHack_DS/blob/main/AgroHack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
from os import listdir
from os.path import isfile, join

import keras
import keras.layers as L
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import backend as K
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation, Dense, Dropout, Flatten
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, img_to_array
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
def convert_image_to_array(img, image_size):
    try:
        return np.array(img.resize((image_size, image_size), Image.ANTIALIAS))
    except BaseException as e:
        print("Error!")
        print(e)
        plt.imshow(np.array(img) / 255)

In [None]:
train_dir = "/content/drive/MyDrive/lemon-dataset/images/train/"
test_dir = "/content/drive/MyDrive/lemon-dataset/images/test/"

images_train_filenames = [f for f in listdir(train_dir) if isfile(join(train_dir, f))]
images_test_filenames = [f for f in listdir(test_dir) if isfile(join(test_dir, f))]

In [None]:
annotations_dir = "/content/drive/MyDrive/lemon-dataset/annotations/"

with open(annotations_dir + "instances_default.json", "r") as fp:
    data = json.load(fp)


df_annotations = data["annotations"]
df_annotations = pd.DataFrame(df_annotations)


df_images = data["images"]
df_images = pd.DataFrame(df_images)

mapper = dict()

for index, row in df_annotations.iterrows():
    if mapper.get(row["image_id"], None) is None:
        mapper[row["image_id"]] = set()
    mapper[row["image_id"]].add(row["category_id"])

image_size = 256

In [None]:
X_train, y_train = [], []

for index, row in df_images.iterrows():
    im = Image.open(train_dir + row["file_name"])
    im_array = convert_image_to_array(im, image_size=image_size)
    cat = np.zeros(9, dtype=int)
    for x in mapper[row["id"]]:
        cat[x - 1] = 1
    X_train.append(im_array)
    y_train.append(cat)


X_train = np.array(X_train)
y_train = np.array(y_train)


X_train.shape, y_train.shape


X_test = []
id_test = []

for filename in images_test_filenames:
    im = Image.open(test_dir + filename)
    im_array = convert_image_to_array(im, image_size)
    X_test.append(im_array)
    id_test.append(filename)

In [None]:
def get_model():
    model = keras.models.Sequential()

    model.add(
        Conv2D(32, (3, 3), padding="same", input_shape=[image_size, image_size, 3])
    )
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))

    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(9))
    model.add(Activation("sigmoid"))

    model.compile(
        loss="binary_crossentropy",
        optimizer="adam",
        metrics=[
            keras.metrics.AUC(name="roc_auc", curve="ROC"),
            keras.metrics.AUC(
                name="roc_auc_multi_label", curve="ROC", multi_label=True
            ),
        ],
    )

    return model

In [None]:
aug = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
)

In [None]:
model = get_model()

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 32)      896       
_________________________________________________________________
activation (Activation)      (None, 256, 256, 32)      0         
_________________________________________________________________
batch_normalization (BatchNo (None, 256, 256, 32)      128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 85, 85, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 85, 85, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 85, 85, 64)        18496     
_________________________________________________________________
activation_1 (Activation)    (None, 85, 85, 64)        0

In [None]:
BS = 32
EPOCHS = 1

history = model.fit(
    aug.flow(X_train, y_train, batch_size=BS),
    steps_per_epoch=len(X_train) // BS,
    epochs=EPOCHS,
    verbose=1,
)



In [None]:
X_test = tf.stack(X_test)
y_preds = model.predict_proba(X_test)

In [None]:
for i in range(len(y_preds)):
    y_preds[i] = np.array(y_preds[i] >= 0.5)

y_preds = np.array(y_preds, dtype=int)


y_preds = pd.DataFrame(
    data=np.concatenate((np.array(id_test).reshape(-1, 1), y_preds), axis=1),
    index=None,
    columns=["image_id"] + list(map(str, range(1, 10))),
)

In [None]:
y_preds.to_csv("submission.csv", index=None)