In [17]:
import pandas as pd
import os
import numpy as np
import cv2 
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator




In [20]:
train_path = 'devset_images/devset_images'
test_path = 'testset_images/testset_images'
label_file_path = 'devset_images_gt.csv'

In [21]:
img_width, img_height = 224, 224
batch_size = 32
epochs = 20

# Load labels from CSV file
labels_df = pd.read_csv(label_file_path)
train_labels = labels_df['label'].values

# Load training images
train_images = []
train_ids = labels_df['id'].astype(str).values

for image_id in train_ids:
    image_path = None
    for extension in ['.jpg', '.png', '.gif']:
        temp_path = os.path.join(train_path, f"{image_id}{extension}")
        if os.path.exists(temp_path):
            image_path = temp_path
            break

    if image_path is not None:
        img = load_img(image_path, target_size=(img_width, img_height))
        
     # Preprocessing images
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(img_width, img_height))
        img = img.astype('float32') / 255.0
        
        img_array = img_to_array(img)
        #train_images = np.append(train_images, img)
        train_images.append(img)

In [None]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)

# Split the training data into training and validation sets
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.applications import VGG16

In [None]:
vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
for layer in vgg16.layers:
    layer.trainable = False

In [None]:
# Create a data generator for data augmentation
data_generator = ImageDataGenerator(rotation_range=20, width_shift_range=0.1, height_shift_range=0.1,
                                    shear_range=0.2, zoom_range=0.2, horizontal_flip=True)

In [None]:
# Build the CNN model
# Build a new model on top of the pre-trained layers
# model = Sequential()
# model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)))
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))

model = Sequential()
model.add(vgg16)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


# Train the model
history = model.fit(data_generator.flow(train_images, train_labels, batch_size=batch_size), 
                    steps_per_epoch=len(train_images) // batch_size,
                    epochs=epochs, validation_data=(val_images, val_labels))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
testset = '/kaggle/input/dplfall2023test/test.csv'

test_data = pd.read_csv(testset)

test_ids = test_data['image_id'].astype(str).values

test_images = []
predicted_labels = []

skipped_image = []

for image_id in test_ids:
    image_path = None
    for extension in ['.jpg', '.png', '.gif','jpeg']:
        temp_path = os.path.join(test_path, f"{image_id}{extension}")
        if os.path.exists(temp_path):
            image_path = temp_path
            break

    if image_path is not None:
        img = load_img(image_path, target_size=(img_width, img_height))
        
     # Preprocessing images
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(img_width, img_height))
        img = img.astype('float32')
        
        img_array = img_to_array(img)
        #test_images = np.append(test_images, img)
        test_images.append(img)
test_images = np.array(test_images)

# Predict label for the image
#prediction = model.predict(np.expand_dims(img, axis=0))
prediction = model.predict(test_images)
predicted_labels = np.round(prediction).flatten()
# predicted_label = int(np.round(prediction)[0])
# predicted_labels.append(predicted_label)



In [None]:
testset = '/kaggle/input/dplfall2023test/test.csv'
test_folder = '/kaggle/input/dplfall2023/2023falldpl30xm/testset_images/testset_images'
test_data = pd.read_csv(testset)

test_ids = test_data['image_id'].astype(str).values

test_images = []
predicted_labels = []
skipped_images = []

for image_id in test_ids:
    image_path = os.path.join(test_folder, str(image_id))

    if not any(image_path.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif']):
        image_path += '.jpg'

    try:
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    except:
        image_path = image_path[:-4] + '.png'
        try:
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        except:
            print(f"Image not found: {image_path}")
            skipped_images.append(image_id)
            continue

    image = cv2.resize(image, (224, 224))
    image = image.astype('float32')
    test_images.append(image)

test_images = np.array(test_images)

# Predict the labels for the test images
predictions = model.predict(test_images)
predicted_labels = np.round(predictions).flatten()

# Create a DataFrame for the predicted labels and image IDs
predicted_df = pd.DataFrame({'id': test_ids, 'label': predicted_labels})

# Print the predicted DataFrame
print(predicted_df)

# Print the skipped images
print("Skipped Images:")
for image_id in skipped_images:
    print(f'Image ID: {image_id}')

[ WARN:0@1227.821] global loadsave.cpp:248 findDecoder imread_('/kaggle/input/dplfall2023/2023falldpl30xm/testset_images/testset_images/5847540163.jpg'): can't open/read file: check file path/integrity
[ WARN:0@1228.714] global loadsave.cpp:248 findDecoder imread_('/kaggle/input/dplfall2023/2023falldpl30xm/testset_images/testset_images/9567943158.jpg'): can't open/read file: check file path/integrity


              id  label
0     3483809003    1.0
1     3712805295    0.0
2      379845620    0.0
3     7343264988    0.0
4     3843337492    0.0
...          ...    ...
1315  6452132743    0.0
1316   244899140    0.0
1317  3073018258    0.0
1318    49525361    0.0
1319   537780925    0.0

[1320 rows x 2 columns]
Skipped Images:


In [None]:
# Create a DataFrame for the predicted labels and image IDs
predicted_df = pd.DataFrame({'id': test_ids, 'label': predicted_labels})
predicted_df['label'] = predicted_df['label'].astype(int)

# Print the predicted DataFrame
print(predicted_df)

              id  label
0     3483809003      1
1     3712805295      0
2      379845620      0
3     7343264988      0
4     3843337492      0
...          ...    ...
1315  6452132743      0
1316   244899140      0
1317  3073018258      0
1318    49525361      0
1319   537780925      0

[1320 rows x 2 columns]


In [None]:
predicted_df.to_csv('test4.csv', index=False)

In [None]:
accuracy = np.mean(predicted_labels == train_labels)
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.0


  accuracy = np.mean(predicted_labels == train_labels)


In [None]:
# Save the trained model
model.save('training_model.h5')