In [1]:
import os
import sys
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split

In [None]:
path = 'AF_dataset'
train_info = pd.read_csv(f'{path}/train.csv')

X = train_info['filename']
y = train_info['class_id']

data = pd.concat([X, y], axis=1)	

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)

print(y_train.value_counts())
print(y_test.value_counts())

class_id
2    190
1    189
Name: count, dtype: int64
class_id
1    48
2    47
Name: count, dtype: int64


In [8]:
test_data

Unnamed: 0,filename,class_id
191,image_id_233_.jpg,2
213,image_id_258_.jpg,1
165,image_id_201_.jpg,2
269,image_id_325_.jpg,2
246,image_id_297_.jpg,2
...,...,...
368,image_id_447_.jpg,1
72,image_id_088_.jpg,2
472,image_id_569_.jpg,2
154,image_id_186_.jpg,2


In [4]:
# test
path = 'AF_dataset'
for i, row in test_data.iterrows():
    print(row['filename'], row['class_id'])
    source_file = f'{path}/images/{row["filename"]}'
    class_name = 'pinguin' if row['class_id'] == 1 else 'turtle'
    destination_folder = f'example/test/{class_name}'
    
    os.makedirs(destination_folder, exist_ok=True)
    # Define the destination file path
    destination_file = os.path.join(destination_folder)
    
    # Copy the file to the destination folder
    shutil.copy(source_file, destination_file)
    print(source_file, destination_file)
    print('='*30)
    
    # print(sample_image['filename'], sample_image['class_id'])# 

image_id_233_.jpg 2
AF_dataset/images/image_id_233_.jpg example/test/turtle
image_id_258_.jpg 1
AF_dataset/images/image_id_258_.jpg example/test/pinguin
image_id_201_.jpg 2
AF_dataset/images/image_id_201_.jpg example/test/turtle
image_id_325_.jpg 2
AF_dataset/images/image_id_325_.jpg example/test/turtle
image_id_297_.jpg 2
AF_dataset/images/image_id_297_.jpg example/test/turtle
image_id_523_.jpg 2
AF_dataset/images/image_id_523_.jpg example/test/turtle
image_id_279_.jpg 2
AF_dataset/images/image_id_279_.jpg example/test/turtle
image_id_271_.jpg 2
AF_dataset/images/image_id_271_.jpg example/test/turtle
image_id_346_.jpg 2
AF_dataset/images/image_id_346_.jpg example/test/turtle
image_id_128_.jpg 2
AF_dataset/images/image_id_128_.jpg example/test/turtle
image_id_356_.jpg 2
AF_dataset/images/image_id_356_.jpg example/test/turtle
image_id_536_.jpg 2
AF_dataset/images/image_id_536_.jpg example/test/turtle
image_id_528_.jpg 2
AF_dataset/images/image_id_528_.jpg example/test/turtle
image_id_43

# Convolutional Neural Network

### Importing the libraries

In [57]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [58]:
tf.__version__

'2.16.2'

## Part 1 - Data Preprocessing

### Preprocessing the Training set

In [59]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

training_set = train_datagen.flow_from_directory(
        'AF_dataset/train',
        target_size=(64, 64),
        batch_size=32,
        class_mode='binary')

Found 379 images belonging to 2 classes.


### Preprocessing the Test set

In [60]:
# Do not apply data augmentation to the test set (only rescaling)
test_datagen = ImageDataGenerator(rescale=1./255)
test_set = test_datagen.flow_from_directory(
        'AF_dataset/test',
        target_size=(64, 64),
        batch_size=32,
        class_mode='binary')

Found 95 images belonging to 2 classes.


## Part 2 - Building the CNN

### Initialising the CNN

In [61]:
cnn = tf.keras.models.Sequential()

### Step 1 - Convolution

In [62]:
# Adding the convolutional layer
# input_shape=[64, 64, 3] -> 64x64 pixels, 3 channels (RGB)
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-10-22 09:53:21.325084: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-10-22 09:53:21.325134: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-10-22 09:53:21.325146: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-10-22 09:53:21.325385: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-10-22 09:53:21.325409: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


### Step 2 - Pooling

In [63]:
# Max pooling
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Adding a second convolutional layer

In [64]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Step 3 - Flattening

In [65]:
# flattening
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full Connection

In [66]:
# full connection
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))

### Step 5 - Output Layer

In [67]:
# output layer
cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Part 3 - Training the CNN

### Compiling the CNN

In [68]:
# compile the CNN
cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

### Training the CNN on the Training set and evaluating it on the Test set

In [69]:
# train the CNN
cnn.fit(x=training_set, validation_data=test_set, epochs=25)

Epoch 1/25


2024-10-22 09:53:22.065325: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
  self._warn_if_super_not_called()


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 137ms/step - accuracy: 0.5216 - loss: 0.7632 - val_accuracy: 0.5474 - val_loss: 0.6416
Epoch 2/25
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.6795 - loss: 0.6307 - val_accuracy: 0.7053 - val_loss: 0.5495
Epoch 3/25
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.7255 - loss: 0.5547 - val_accuracy: 0.7474 - val_loss: 0.5151
Epoch 4/25
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.7906 - loss: 0.4847 - val_accuracy: 0.7263 - val_loss: 0.5218
Epoch 5/25
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - accuracy: 0.8222 - loss: 0.4381 - val_accuracy: 0.6947 - val_loss: 0.6121
Epoch 6/25
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.8311 - loss: 0.4155 - val_accuracy: 0.7474 - val_loss: 0.5309
Epoch 7/25
[1m12/12[0m [32m━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x349d4bbe0>

## Part 4 - Making a single prediction

In [90]:
# import numpy as np
# from keras.preprocessing import image

# test_image = image.load_img('AF_dataset/single_prediction/prediction_01.png', target_size=(64, 64))
# # convert into a array
# test_image = image.img_to_array(test_image)
# # add batch dimension
# test_image = np.expand_dims(test_image, axis=0)
# # result
# result = cnn.predict(test_image)
# # encode the result
# training_set.class_indices
# print(result[0][0])
# if result[0][0] == 0:
# 	prediction = 'pinguin'
# else:
# 	prediction = 'turtle'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
0.0


In [97]:
import numpy as np
import pandas as pd
import os
from keras.preprocessing import image

# Directory containing the validation images
validation_dir = 'AF_dataset/validation'

# List to store the results
results = []

# Loop over all images in the validation folder
for filename in os.listdir(validation_dir):
    # Load each image with the target size of (64, 64)
    img_path = os.path.join(validation_dir, filename)
    test_image = image.load_img(img_path, target_size=(64, 64))
    
    # Convert the image to an array and add batch dimension
    test_image = image.img_to_array(test_image)
    test_image = np.expand_dims(test_image, axis=0)
    
    # Predict the class
    result = cnn.predict(test_image)
    
    # Decode the result
    if result[0][0] == 0:
        prediction = 1  # pinguin
    else:
        prediction = 2  # turtle
    
    # Append the filename and predicted class to the results list
    results.append([filename, prediction])

# Create a DataFrame from the results
submission_df = pd.DataFrame(results, columns=['filename', 'class_id'])



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1

In [105]:
validation_data = pd.read_csv('AF_dataset/test.csv')
submission_df_2 = validation_data
merged_df = pd.merge(submission_df_2, submission_df[['filename', 'class_id']], on='filename', how='left')

# Update the 'class_id' in submission_df_2 with the values from the merged DataFrame
submission_df_2['class_id'] = merged_df['class_id']

# Print the updated DataFrame
submission_df_2.head()

# # Save the DataFrame to a CSV file
# submission_df.to_csv('submission.csv', index=False)

# print("Submission CSV file created: 'submission.csv'")

Unnamed: 0,filename,class_id
0,image_id_004_.jpg,2
1,image_id_010_.jpg,1
2,image_id_016_.jpg,2
3,image_id_024_.jpg,1
4,image_id_033_.jpg,1


In [106]:
submission_df[submission_df['filename'] == 'image_id_033_.jpg']

Unnamed: 0,filename,class_id
3,image_id_033_.jpg,1


In [108]:
# Save the DataFrame to a CSV file
submission_df_2.to_csv('submission_2.csv', index=False)

# Quick test

In [95]:
# # validation

# path = 'AF_dataset'
# validation_data = pd.read_csv('AF_dataset/test.csv')


# for i, row in validation_data.iterrows():
#     source_file = f'{path}/images/{row["filename"]}'
    
#     destination_folder = f'AF_dataset/validation'
    
#     os.makedirs(destination_folder, exist_ok=True)
#     # Define the destination file path
#     destination_file = os.path.join(destination_folder)
    
#     # Copy the file to the destination folder
#     shutil.copy(source_file, destination_file)
#     print(source_file, destination_file)
#     print('='*30)
    
#     # print(sample_image['filename'], sample_image['class_id'])# 

AF_dataset/images/image_id_004_.jpg AF_dataset/validation
AF_dataset/images/image_id_010_.jpg AF_dataset/validation
AF_dataset/images/image_id_016_.jpg AF_dataset/validation
AF_dataset/images/image_id_024_.jpg AF_dataset/validation
AF_dataset/images/image_id_033_.jpg AF_dataset/validation
AF_dataset/images/image_id_034_.jpg AF_dataset/validation
AF_dataset/images/image_id_037_.jpg AF_dataset/validation
AF_dataset/images/image_id_038_.jpg AF_dataset/validation
AF_dataset/images/image_id_043_.jpg AF_dataset/validation
AF_dataset/images/image_id_050_.jpg AF_dataset/validation
AF_dataset/images/image_id_054_.jpg AF_dataset/validation
AF_dataset/images/image_id_066_.jpg AF_dataset/validation
AF_dataset/images/image_id_073_.jpg AF_dataset/validation
AF_dataset/images/image_id_075_.jpg AF_dataset/validation
AF_dataset/images/image_id_076_.jpg AF_dataset/validation
AF_dataset/images/image_id_086_.jpg AF_dataset/validation
AF_dataset/images/image_id_090_.jpg AF_dataset/validation
AF_dataset/ima