In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import keras
import cv2
from PIL import Image
from multiprocessing.pool import Pool

In [None]:
# Create array with all images
data_dir = 'kaggle/input/ukraine-ml-bootcamp-2023/images/train_images/'
BATCH_SIZE = 64
from image_preprocessing import get_train_ds
IMG_SIZE = (150, 150)
df = pd.read_csv('kaggle/input/ukraine-ml-bootcamp-2023/train.csv')
df['class_6'] = df['class_6'].astype(str)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_generator = ImageDataGenerator(
    zoom_range=0.25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest",
    rescale=1./255,
    validation_split=0.1)
val_generator = ImageDataGenerator(rescale=1./255)

In [None]:
train_data = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=data_dir,
    x_col='image_id',
    y_col='class_6',
    target_size=IMG_SIZE,
    color_mode='rgb',
    shuffle=True,
    subset="training",
    class_mode='categorical',
)
val_data = train_generator.flow_from_dataframe(
    dataframe=df,
    directory=data_dir,
    x_col='image_id',
    y_col='class_6',
    target_size=IMG_SIZE,
    color_mode='rgb',
    shuffle=True,
    subset="validation",
    class_mode='categorical',
)

In [None]:
test_data = val_generator.flow_from_dataframe(
    dataframe=df,
    directory=data_dir,
    x_col='image_id',
    y_col='class_6',
    target_size=IMG_SIZE,
    color_mode='rgb',
    shuffle=True,
    subset="validation",
    class_mode='raw',
)

# Model definition

In [None]:
from keras.applications import xception
from keras.layers import Conv1D, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, SimpleRNN
from keras.callbacks import ReduceLROnPlateau
lr_reduce = ReduceLROnPlateau(monitor='val_accuracy', factor=0.6, patience=8, verbose=1, mode='max', min_lr=5e-5)
checkpoint = keras.callbacks.ModelCheckpoint('model.h15', monitor= 'val_accuracy', mode= 'max', save_best_only = True, verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

In [None]:
def hyperparameter_tuning(use_xcept, how_deep_dense, learning_rate, BATCH_SIZE, epochs):
    if use_xcept:
        xcept = xception.Xception(
            include_top=False,
            weights="imagenet",
            input_shape=[IMG_SIZE[0], IMG_SIZE[1], 3],
            pooling='avg',
            classifier_activation="softmax",
        )
        for layer in xcept.layers[:110]:
            layer.trainable = False
        model = keras.Sequential()
        model.add(xcept)
    else:
        model = keras.Sequential([
            # keras.Input((IMG_SIZE[0], IMG_SIZE[1], 1)),
            # keras.Input((33, 4, 1)),
            Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
            MaxPooling2D((2, 2)),
            Conv2D(64, (3, 3), activation='relu'),
            MaxPooling2D((2, 2)),
            Conv2D(128, (3, 3), activation='relu'),
            MaxPooling2D((2, 2)),
            Flatten(),
        ])
    if how_deep_dense == 0:
        model.add(Dense(256, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(6, activation='softmax'))
    elif how_deep_dense == 1:
        model.add(Dense(256, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(6, activation='softmax'))
    else:
        model.add(Dense(512, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(6, activation='softmax'))

    model.compile(loss="categorical_crossentropy", 
                optimizer=keras.optimizers.Adam(learning_rate=learning_rate), 
                metrics=["accuracy"])
    
    acc = model.fit(train_data,
            steps_per_epoch=train_data.samples//BATCH_SIZE,
            validation_steps=val_data.samples//BATCH_SIZE,
            batch_size=BATCH_SIZE,
            epochs=epochs,
            validation_data=val_data,
            callbacks=[early_stop])
            # callbacks=[checkpoint, early_stop])
    return np.min(acc.history['val_loss'])

In [None]:
def objective(trial):
    use_xcept = trial.suggest_categorical("use_xcept", [True, False])
    how_deep_dense = trial.suggest_categorical("how_deep_dense", [0, 1, 2])
    learning_rate = trial.suggest_float("learning_rate", 3e-6, 3e-3)
    BATCH_SIZE = trial.suggest_categorical("BATCH_SIZE", [32, 64])
    epochs = trial.suggest_int("epochs", 50, 150)

    return hyperparameter_tuning(use_xcept, how_deep_dense, learning_rate, BATCH_SIZE, epochs)

# Training the model

In [None]:
# Params = [use_xcept: True, how_deep_dense: 2, learning_rate: 0.002462842770436472, BATCH_SIZE: 64, epochs: 99]

In [None]:
import optuna

study = optuna.create_study(
    storage="sqlite:///db.sqlite3",
    study_name="xception transfer learn hyperopt7"
)
study.optimize(objective, n_trials=100)

study.best_params

In [None]:
# Plot training & validation accuracy, F1 score, and loss values
plt.figure(figsize=(15, 5))

# Plotting Accuracy
plt.subplot(1, 3, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plotting Loss
plt.subplot(1, 3, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()

# Creating predictions submissions file

In [None]:
def check_model_val_accuracy():
    model.load_weights('model.h15')
    from image_preprocessing import get_val_ds
    # Load sample submission, change values to model forecasts
    test_data_dir = 'kaggle/input/ukraine-ml-bootcamp-2023/images/test_images/'

    df = pd.read_csv('kaggle/input/ukraine-ml-bootcamp-2023/sample_submission.csv')
    df_np = pd.read_csv('kaggle/input/ukraine-ml-bootcamp-2023/sample_submission.csv').to_numpy()
    images = get_val_ds(df_np, test_data_dir, IMG_SIZE)

In [None]:
for i, (name, cords) in enumerate(images):
    predictions = model.predict(np.array([cords]), verbose=0)[0]
    if i < 5:
        print(predictions)
    df.at[i,'class_6'] = np.argmax(predictions)

In [None]:
df.to_csv('submission.csv', index=False)