In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow import keras  # Ensure Keras is imported


# Load dataset
train_dataset = image_dataset_from_directory(
    '/kaggle/input/vista-25/dataset_1/dataset_1/train',
    image_size=(224, 224),
    batch_size=32,
    validation_split=0.2,
    subset='training',
    seed=123
)

validation_dataset = image_dataset_from_directory(
    '/kaggle/input/vista-25/dataset_1/dataset_1/train',
    image_size=(224, 224),
    batch_size=32,
    validation_split=0.2,
    subset='validation',
    seed=123
)

# Define input shape explicitly
input_shape = (224, 224, 3)
inputs = layers.Input(shape=input_shape)

# Data augmentation layer
data_augmentation = models.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
    layers.RandomContrast(0.2)
])

# EfficientNetV2 base model
from tensorflow.keras.applications import EfficientNetV2L
base_model = EfficientNetV2L(input_shape=input_shape, include_top=False, weights="imagenet")

# Freeze the base model initially
base_model.trainable = False

# Build the model using Functional API
x = data_augmentation(inputs)
x = base_model(x, training=False)  # Prevent batch norm updates initially
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

# Create the model
model = models.Model(inputs, outputs)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Use .keras instead of .h5
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')

model.save("best_model.keras")  # Save model first
loaded_model = keras.models.load_model("best_model.keras")  # Then load

# Train the model
history = model.fit(
    train_dataset,
    epochs=5,
    validation_data=validation_dataset,
    callbacks=[early_stopping, model_checkpoint]
)

# Fine-tuning: Unfreeze some layers of the base model
base_model.trainable = True
for layer in base_model.layers[:100]:  # Keep some layers frozen
    layer.trainable = False

# Recompile with a lower learning rate for fine-tuning
model.compile(
    optimizer=Adam(learning_rate=0.00001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Continue training with fine-tuning
history_finetune = model.fit(
    train_dataset,
    epochs=5,
    validation_data=validation_dataset,
    callbacks=[early_stopping, model_checkpoint]
)

# Evaluate on test dataset
test_dataset = image_dataset_from_directory(
    '/kaggle/input/vista-25/dataset_1/dataset_1/train',
    image_size=(224, 224),
    batch_size=32
)

test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_acc*100:.2f}%") 

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image
import numpy as np
import pandas as pd
import os

# Define test path
test_path = "/kaggle/input/vista-25/dataset_1/dataset_1/test"

# List and sort image files
image_files = sorted([f for f in os.listdir(test_path) if f.endswith(('.jpg', '.jpeg', '.png'))])

# Batch size for processing
batch_size = 32

# Function to load and preprocess images efficiently
def load_and_preprocess_image(img_path):
    # Read the image from file
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)  # Decode image (for .jpg)
    img = tf.image.resize(img, (224, 224))  # Resize to target size
    img = img / 255.0  # Normalize
    return img

# Create a tf.data dataset to load images in batches
image_paths = [os.path.join(test_path, img_name) for img_name in image_files]
image_dataset = tf.data.Dataset.from_tensor_slices(image_paths)

# Map the preprocessing function onto the dataset
image_dataset = image_dataset.map(lambda x: load_and_preprocess_image(x), num_parallel_calls=tf.data.AUTOTUNE)

# Batch the images and prefetch for performance
image_dataset = image_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Prepare for predictions
img_ids = []
predictions = []

# Process the images in batches
for img_batch in image_dataset:
    # Predict batch of images
    preds = model.predict(img_batch)
    
    # Get labels (0 or 1 based on threshold)
    labels = (preds > 0.5).astype(int)
    
    # Append the image file names and predictions
    for i, img_name in enumerate(image_files[len(img_ids):len(img_ids) + batch_size]):
        img_ids.append(img_name)
        predictions.append(labels[i][0])

# Save predictions to CSV
df = pd.DataFrame({"image_id": img_ids, "label": predictions})
df.to_csv("submission.csv", index=False)

print("✅ Predictions saved to submission.csv")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 251ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 200ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 