In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
pip install keras



In [3]:
!pip install keras_preprocessing

Collecting keras_preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras_preprocessing
Successfully installed keras_preprocessing-1.1.2


In [4]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from keras_preprocessing.image import load_img
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

# Constants
TRAIN_DIR = "/content/drive/MyDrive/induction-task/Data/Train"
TEST_DIR = "/content/drive/MyDrive/induction-task/Data/Test"
VALID_EXTENSIONS = (".jpg", ".jpeg", ".png")

# Helper functions
def preprocess_images(image_paths, target_size=(236, 236)):
    """Load and preprocess images."""
    features = []
    valid_paths = []
    invalid_paths = []
    for path in tqdm(image_paths, desc="Preprocessing images"):
        try:
            img = load_img(path, target_size=target_size)
            img = np.array(img)
            features.append(img)
            valid_paths.append(path)
        except Exception as e:
            print(f"Error loading {path}: {e}")
            invalid_paths.append(path)
    features = np.array(features).reshape(-1, *target_size, 3)
    return features, valid_paths, invalid_paths

def create_dataframe(directory):
    """Create a dataframe with image paths and labels."""
    image_paths = []
    labels = []
    for label in os.listdir(directory):
        label_dir = os.path.join(directory, label)
        if os.path.isdir(label_dir):
            for image_name in os.listdir(label_dir):
                if image_name.lower().endswith(VALID_EXTENSIONS):
                    image_paths.append(os.path.join(label_dir, image_name))
                    labels.append(label)
            print(f"{label} completed.")
    return pd.DataFrame({"image": image_paths, "label": labels})

# Load and preprocess training data
print("Loading training data...")
train_df = create_dataframe(TRAIN_DIR)

print("Preprocessing training images...")
train_features, valid_train_paths, _ = preprocess_images(train_df["image"])
x_train = train_features / 255.0  # Normalize pixel values

# Encode labels
le = LabelEncoder()
train_labels = le.fit_transform(train_df["label"])
y_train = to_categorical(train_labels, num_classes=2)

# Model definition
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(236, 236, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(2, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
print("Training the model...")
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.fit(x_train, y_train, validation_split=0.2, batch_size=32, epochs=20, callbacks=[early_stopping])

# Preprocess test images
print("Loading and preprocessing test data...")
test_images = sorted([img for img in os.listdir(TEST_DIR) if img.lower().endswith(VALID_EXTENSIONS)],
                     key=lambda x: int(x.split('_')[1].split('.')[0]))
test_paths = [os.path.join(TEST_DIR, img) for img in test_images]

x_test, valid_test_paths, invalid_test_paths = preprocess_images(test_paths)
x_test = x_test / 255.0  # Normalize test data

# Make predictions for valid images
print("Making predictions...")
predictions = model.predict(x_test)
predicted_labels = le.inverse_transform(np.argmax(predictions, axis=1))

# Create submission entries for valid and invalid images
submission_data = []
valid_image_to_label = dict(zip(valid_test_paths, predicted_labels))

for img_path in test_paths:
    img_name = os.path.basename(img_path).split('.')[0]  # Remove the file extension
    if img_path in valid_image_to_label:
        submission_data.append((img_name, valid_image_to_label[img_path]))
    else:
        submission_data.append((img_name, "Real"))  # Default label for invalid images

# Create submission file
print("Creating submission file...")
submission = pd.DataFrame(submission_data, columns=['Id', 'Label'])
submission.to_csv("submission.csv", index=False)
print("Submission file created successfully!")


Loading training data...
AI completed.
Real completed.
Preprocessing training images...


Preprocessing images: 100%|██████████| 801/801 [04:43<00:00,  2.82it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training the model...
Epoch 1/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 263ms/step - accuracy: 0.5915 - loss: 1.6614 - val_accuracy: 0.4658 - val_loss: 1.0712
Epoch 2/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 79ms/step - accuracy: 0.9076 - loss: 0.2550 - val_accuracy: 0.9814 - val_loss: 0.0600
Epoch 3/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 65ms/step - accuracy: 0.9709 - loss: 0.0672 - val_accuracy: 0.9814 - val_loss: 0.1240
Epoch 4/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 63ms/step - accuracy: 0.9935 - loss: 0.0231 - val_accuracy: 0.9752 - val_loss: 0.0652
Epoch 5/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 61ms/step - accuracy: 0.9853 - loss: 0.0349 - val_accuracy: 0.9565 - val_loss: 0.1638
Epoch 6/20
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step - accuracy: 0.9896 - loss: 0.0331 - val_accuracy: 1.0000 - val_loss: 0.0018
Epoch 7/2

Preprocessing images:  31%|███       | 62/200 [00:13<00:24,  5.60it/s]

Error loading /content/drive/MyDrive/induction-task/Data/Test/image_62.jpg: cannot identify image file <_io.BytesIO object at 0x79024015b1a0>


Preprocessing images: 100%|██████████| 200/200 [00:26<00:00,  7.54it/s]


Making predictions...
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 161ms/step
Creating submission file...
Submission file created successfully!
