In [None]:
import os
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split

In [None]:
# Directory paths
path_train = '/content/drive/MyDrive/NM Dataset/train_v2/train'
path_validation = '/content/drive/MyDrive/NM Dataset/validation_v2/validation'

In [None]:
# Load CSV files
train_csv_path = '/content/drive/MyDrive/NM Dataset/written_name_train_v2.csv'
validation_csv_path = '/content/drive/MyDrive/NM Dataset/written_name_validation_v2.csv'

In [None]:
train_df = pd.read_csv(train_csv_path)
validation_df = pd.read_csv(validation_csv_path)

In [None]:
# Sample data for training and validation
train_df_sample = train_df.sample(n=10000, random_state=42)
validation_df_sample = validation_df.sample(n=3000, random_state=42)

In [None]:
def preprocess_image(image_path, target_size=(100, 100)):
    image = cv2.imread(image_path)
    resized_image = cv2.resize(image, target_size)
    normalized_image = resized_image / 255.0
    return normalized_image

In [None]:
# Preprocess training images and labels
train_images = []
train_labels = []
for idx, row in train_df_sample.iterrows():
    image_path = os.path.join(path_train, row['FILENAME'])
    processed_image = preprocess_image(image_path)
    train_images.append(processed_image)
    train_labels.append(row['IDENTITY'])

In [None]:
import os
import pandas as pd

# Define the paths to validation images and CSV file
path_validation = '/content/drive/MyDrive/NM Dataset/validation_v2/validation'
validation_csv_path = '/content/drive/MyDrive/NM Dataset/written_name_validation_v2.csv'

# Check if the paths exist
if not os.path.exists(path_validation):
    raise FileNotFoundError(f"Path '{path_validation}' does not exist.")
if not os.path.exists(validation_csv_path):
    raise FileNotFoundError(f"CSV file '{validation_csv_path}' does not exist.")

# Read the validation CSV file into a DataFrame
validation_df = pd.read_csv(validation_csv_path)

# Preprocess validation images and labels
validation_images = []
validation_labels = []

for idx, row in validation_df.iterrows():  # Iterate through each row in the DataFrame
    # Get the filename from the DataFrame
    filename = row['FILENAME']

    # Add leading zeros to the filename if it has 4 digits
    if len(filename) == 9:  # Assuming filenames are in the format 'TRAIN_XXXX.jpg'
        filename = filename[:6] + '0' + filename[6:]

    # Construct the full path to the image
    image_path = os.path.join(path_validation, filename)

    # Check if the image exists
    if not os.path.exists(image_path):
        print(f"Warning: Image '{image_path}' not found.")
        continue

    # Assuming there's a function called preprocess_image that processes the image
    try:
        processed_image = preprocess_image(image_path)
        validation_images.append(processed_image)
        validation_labels.append(row['IDENTITY'])
    except Exception as e:
        print(f"Error processing image '{image_path}': {e}")

# Check if any images were loaded
if not validation_images:
    print("No images were loaded. Please check your file paths and the preprocess_image function.")


In [None]:
print(f"Number of validation images loaded: {len(validation_images)}")
print(f"Number of train images loaded: {len(train_images)}")

Number of validation images loaded: 5000
Number of train images loaded: 10000


In [None]:
import numpy as np
train_images = np.array(train_images)
train_labels = np.array(train_labels)
validation_images = np.array(validation_images)
validation_labels = np.array(validation_labels)

In [None]:
# Split validation data into validation and test sets
validation_images, test_images, validation_labels, test_labels = train_test_split(
    validation_images, validation_labels, test_size=0.5, random_state=42)

In [None]:
print("Validation Images Shape:", validation_images.shape)
print("Validation Labels Shape:", validation_labels.shape)
print("Test Images Shape:", test_images.shape)
print("Test Labels Shape:", test_labels.shape)

Validation Images Shape: (2500, 100, 100, 3)
Validation Labels Shape: (2500,)
Test Images Shape: (2500, 100, 100, 3)
Test Labels Shape: (2500,)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# Convert labels to numerical format
train_labels = np.array([1 if label == 'POSITIVE_CLASS' else 0 for label in train_labels])
validation_labels = np.array([1 if label == 'POSITIVE_CLASS' else 0 for label in validation_labels])
test_labels = np.array([1 if label == 'POSITIVE_CLASS' else 0 for label in test_labels])

# Define the model architecture
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_images, train_labels, epochs=10,
                    validation_data=(validation_images, validation_labels))

# Evaluate the model on test set
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 1.0


In [None]:
model_save_path = '/content/drive/MyDrive/NM Model/NM model.h5'

model.save(model_save_path)

print("Model saved successfully at:", model_save_path)

Model saved successfully at: /content/drive/MyDrive/NM Model/NM model.h5
