In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 32


TRAIN_PATH

# train path is for training folder data path


TRAIN_IMAGES = glob.glob(TRAIN_PATH + '/*.png')
DF_TRAIN = pd.DataFrame(TRAIN_IMAGES, columns = ['image_path'])

classes = {0 : 'BLED',
           1 : 'NON BLED',
           }
           import os

def get_three_classes(x, y):
    # Create a mapping from folder names to class labels
    class_mapping = {'0': 0, '1': 1}

    # Initialize lists to store filtered data
    filtered_x = []
    filtered_y = []

    # Iterate through the data and filter based on folder names
    for i in range(len(x)):
        folder_name = os.path.basename(os.path.dirname(x[i]))  # Extract folder name from path
        if folder_name in class_mapping:
            filtered_x.append(x[i])
            filtered_y.append(class_mapping[folder_name])

    # Convert to NumPy arrays and one-hot encode labels
    filtered_x = np.array(filtered_x)
    filtered_y = tf.keras.utils.to_categorical(filtered_y, num_classes=2)

    return filtered_x, filtered_y

# Check if the folder exists
if os.path.exists(TRAIN_PATH):
    # List all files and folders in the specified folder
    file_list = os.listdir(TRAIN_PATH)

    for file_name in file_list:
        file_path = os.path.join(TRAIN_PATH, file_name)
        if os.path.isdir(file_path):
            print(f"Folder: {file_name}")
        else:
            print(f"File: {file_name}")
else:
    print("Folder not found.")

data_path = TRAIN_PATH

def get_three_classes(x, y):
    unique_classes = np.unique(y)

    if 0 in unique_classes and 1 in unique_classes:
        indices_0 = np.where(y == 0)[0]
        indices_1 = np.where(y == 1)[0]

        indices = np.concatenate([indices_0, indices_1], axis=0)

        x = x[indices]
        y = y[indices]

        count = x.shape[0]
        indices = np.random.choice(range(count), count, replace=False)

        x = x[indices]
        y = y[indices]

        y = tf.keras.utils.to_categorical(y)

        return x, y
    else:
        # Handle the case where classes 0 and 1 are not present
        print("Classes 0 and 1 not found in the data.")
        return None, None

def load_and_preprocess_data(data_path, image_size=(224, 224)):
    image_paths = []
    labels = []

    # Loop through subfolders (class directories)
    for class_name in os.listdir(data_path):
        class_path = os.path.join(data_path, class_name)
        if os.path.isdir(class_path):
            label = int(class_name)  # Assuming class folders are named with class indices
            for image_filename in os.listdir(class_path):
                image_path = os.path.join(class_path, image_filename)
                image = cv2.imread(image_path)
                if image is not None:
                    image = cv2.resize(image, image_size)  # Resize to a consistent size
                    image = image.astype(np.float32) / 255.0  # Normalize pixel values
                    image_paths.append(image_path)
                    labels.append(label)

    return np.array(image_paths), np.array(labels)


x_train, y_train = load_and_preprocess_data(data_path)

# Use the get_three_classes function to filter the data
x_train, y_train = get_three_classes(x_train, y_train)

# Print the shapes
print(x_train.shape, y_train.shape)


# Assuming data stored in x_train and y_train
# The shapes are (2618,) for x_train and (2618, 2) for y_train

# Determine the number of samples
num_samples = len(x_train)

# Set the random seed for reproducibility (optional)
np.random.seed(42)

# Generate random indices for splitting the data
indices = np.arange(num_samples)
np.random.shuffle(indices)

# Define the split ratio (e.g., 80% training, 20% validation)
split_ratio = 0.8
split_index = int(split_ratio * num_samples)

# Split the data into training and validation sets
x_train_split = x_train[indices[:split_index]]
y_train_split = y_train[indices[:split_index]]
x_val_split = x_train[indices[split_index:]]
y_val_split = y_train[indices[split_index:]]

# Print the shapes of the splits
print("Training data shapes:")
print("x_train_split shape:", x_train_split.shape)
print("y_train_split shape:", y_train_split.shape)
print("\nValidation data shapes:")
print("x_val_split shape:", x_val_split.shape)
print("y_val_split shape:", y_val_split.shape)



# Augmentation
# Define a list to store augmented images and their corresponding labels
augmented_X_train = []
augmented_y_train = []

# Apply data augmentation to each image in X_train
for image_path, label in zip(x_train_split, y_train_split):
    image = tf.io.read_file(image_path)  # Read the image file
    image = tf.image.decode_image(image, channels=3)  # Decode image and set the number of channels
    image = data_augmen2(image)  # Apply data augmentation
    augmented_X_train.append(image)
    augmented_y_train.append(label)

# Convert lists to TensorFlow tensors
augmented_X_train = tf.convert_to_tensor(augmented_X_train)
augmented_y_train = tf.convert_to_tensor(augmented_y_train)

# Print the shape of augmented data
print("Shape of augmented X_train:", augmented_X_train.shape)
print("Shape of augmented y_train:", augmented_y_train.shape)




# Define a list to store the original validation images and their corresponding labels
original_X_val = []
original_y_val = []

# Load the original validation data (without augmentation)
for image_path, label in zip(x_val_split, y_val_split):
    image = tf.io.read_file(image_path)  # Read the image file
    image = tf.image.decode_image(image, channels=3)  # Decode image and set the number of channels
    original_X_val.append(image)
    original_y_val.append(label)

# Convert lists to TensorFlow tensors
original_X_val = tf.convert_to_tensor(original_X_val)
original_y_val = tf.convert_to_tensor(original_y_val)

# Print the shape of the original validation data
print("Shape of original X_val:", original_X_val.shape)
print("Shape of original y_val:", original_y_val.shape)









