In [9]:
import pandas as pd
import numpy as np
import cv2
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

# Load data from CSV files
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
sample_df = pd.read_csv('sample_submission.csv')

test_df.head()



Unnamed: 0,id
0,Qt1fGUB0Vz.jpeg
1,j4Rhioq7R3.jpeg
2,rD0hgFHJUZ.jpeg
3,aY5z1EJsJ6.jpeg
4,qZ3IoxD2TE.jpeg


In [14]:

# Define preprocessing function
def preprocess_image(image):
    target_size = (224, 224)
    if image is None:
        print(f"Error: Unable to load image")
        return None
    image = cv2.resize(image, target_size)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype('float32') / 255.0
    return image

# Initialize lists to store images and labels
train_images = []
train_labels = []
test_images = []

# Load and preprocess images from the folder
folder_path = 'images'
image_extensions = (".jpg", ".jpeg", ".png", ".webp")

# Set of train and test image IDs
train_ids = set(train_df['id'])
test_ids = set(test_df['id'])

# Process all images in the folder
all_image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(image_extensions)]
valid_test_ids = []

for filename in all_image_files:
    image_path = os.path.join(folder_path, filename)
    image = cv2.imread(image_path)
    preprocessed_image = preprocess_image(image)
    
    if preprocessed_image is not None:
        if filename in train_ids:
            train_images.append(preprocessed_image)
            train_labels.append(train_df[train_df['id'] == filename]['target'].values[0])
        elif filename in test_ids:
            test_images.append(preprocessed_image)
            valid_test_ids.append(filename)

# Convert lists of images and labels to NumPy arrays
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)

# Verify that we have the correct number of images
print(f"Number of training images: {len(train_images)}")
print(f"Number of training labels: {len(train_labels)}")
print(f"Number of test images: {len(test_images)}")

# Define model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=15, batch_size=32)

# Ensure there are test images to predict
if len(test_images) > 0:
    # Predict labels for test images
    test_predictions = model.predict(test_images).flatten()

    # Convert predictions to binary (0 or 1)
    test_predictions = (test_predictions > 0.5).astype(int)

    # Filter the test_df to only include valid images
    filtered_test_df = test_df[test_df['id'].isin(valid_test_ids)].copy()
    filtered_test_df['target'] = test_predictions

    # Save predictions to a CSV file
    filtered_test_df[['id', 'target']].to_csv('sample_submission.csv', index=False)

    print("Model training and prediction complete. Predictions saved to 'sample_submission.csv'.")
else:
    print("No test images found. Prediction step skipped.")


Number of training images: 724
Number of training labels: 724
Number of test images: 365


  super().__init__(


Epoch 1/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 810ms/step - accuracy: 0.9290 - loss: 0.8188
Epoch 2/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 819ms/step - accuracy: 0.9164 - loss: 0.2871
Epoch 3/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 812ms/step - accuracy: 0.9355 - loss: 0.2128
Epoch 4/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 822ms/step - accuracy: 0.9369 - loss: 0.1545
Epoch 5/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 825ms/step - accuracy: 0.9838 - loss: 0.0742
Epoch 6/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 805ms/step - accuracy: 0.9938 - loss: 0.0246
Epoch 7/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 816ms/step - accuracy: 0.9974 - loss: 0.0171
Epoch 8/15
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 806ms/step - accuracy: 0.9918 - loss: 0.0347
Epoch 9/15
[1m23/23[0m [32m━━