In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import datetime
import pandas as pd

from matplotlib import pyplot as plt

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Dropout
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
print(tf.__version__)

def plot_losses(history):
    plt.rcParams['figure.figsize'] = [20, 5]
    f, (ax1, ax2) = plt.subplots(1, 2, sharex=True)

    ax1.set_title('Losses')
    ax1.set_xlabel('epoch')
    ax1.legend(loc="upper right")
    ax1.grid()
    ax1.plot(history['loss'], label='Training loss')
    ax1.plot(history['val_loss'], label='Validation loss')
    ax1.legend()

    ax2.set_title('Accuracy')
    ax2.set_xlabel('epoch')
    ax2.legend(loc="upper right")
    ax2.grid()
    ax2.plot(history['accuracy'], label='Training accuracy')
    ax2.plot(history['val_accuracy'], label='Validation accuracy')
    ax2.legend()

    plt.show()

def create_submission(test_directory, save= True, preprocess_function= None, family= "generic", name= "model"):
    if preprocess_function is not None:
        tDatagen = ImageDataGenerator(
            preprocessing_function=preprocess_function
        )
    else:
        tDatagen = ImageDataGenerator(rescale=1./255.)
    test_datagen = tDatagen.flow_from_directory(
        test_directory,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False #To mantain order and being able to metric
    )
    preds = model.predict(test_datagen)
    filenames = [os.path.basename(f) for f in test_datagen.filenames]  
    pred_indices = np.argmax(preds, axis=1)
    class_labels = {v: k for k, v in train_generator.class_indices.items()}
    pred_classes = [class_labels[i] for i in pred_indices]
    submissions = pd.DataFrame({
        "id": filenames,
        "category": pred_classes
    })
    submissions.set_index('id',inplace=True)
    os.makedirs("submissions", exist_ok=True)
    timestamp = datetime.datetime.now().strftime("%m_%d_%H:%M")
    submission_path = f"submissions/{family}_{name}_{timestamp}.csv"
    if save:
        submissions.to_csv(submission_path)
    return submissions

### Under and Oversampling

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

def get_class_weights(generator):
    """
    Compute class weights for imbalanced datasets when using flow_from_directory.
    
    Args:
        generator: Keras ImageDataGenerator.flow_from_directory object (train_generator)
    
    Returns:
        dict mapping class index -> weight
    """
    class_indices = generator.class_indices
    classes = np.unique(generator.classes)
    
    class_weights = compute_class_weight(
        class_weight="balanced",
        classes=classes,
        y=generator.classes
    )
    
    return dict(zip(classes, class_weights))

### Hiperparameters

In [None]:
target_size = (250, 250)
batch_size = 128
data_dir = '/kaggle/input/animalprediction/mg-animal-prediction-25-26/train_images'

## For model save
family = "Transfer"
name = "main"
model_path = "/kaggle/working/models/Transfer_main_10_01_13:23.keras"

### Model

In [None]:
vgg16 = VGG16(weights='imagenet',include_top = False, input_shape=(target_size[0], target_size[1], 3))

model = Sequential()
model.add(vgg16)
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.1))
model.add(Dense(128, activation="relu"))
model.add(Dense(10, activation="softmax"))
for layer in vgg16.layers[:13]:
    layer.trainable = False
print(model.summary())

if model_path:
    model = keras.models.load_model(model_path)

In [None]:
# We create a generator to enrich data
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=0.1,  # 20% for validation
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.4,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    seed=2004
)

validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    seed=2004
)

In [None]:
opt = keras.optimizers.Adam(learning_rate=1e-5)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
print(get_class_weights(train_generator))

In [None]:
history = model.fit(
train_generator,
epochs = 8,
validation_data = validation_generator,
verbose = 1,
class_weight=get_class_weights(train_generator))

In [None]:
plot_losses(history.history)

In [None]:
# Current timestamp
timestamp = datetime.datetime.now().strftime("%m_%d_%H:%M")
## Make sure everything saves correctly
os.makedirs("models", exist_ok=True)
try:
    family = family
except:
    family= "Transfer"
try:
    name = name
except:
    name= "main"
model_path = f"models/{family}_{name}_{timestamp}.keras"

# Save the model
model.save(model_path)

In [None]:
create_submission("/kaggle/input/animalprediction/mg-animal-prediction-25-26/test_images", preprocess_function= preprocess_input, family=family, name=name)