<a href="https://colab.research.google.com/github/AzazelSarie/Kaggle/blob/main/CNN_V1_TPU_Petals_to_the_Metal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install tensorflow-addons
!pip install tensorflow-io

Collecting tensorflow-io
  Downloading tensorflow_io-0.37.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Downloading tensorflow_io-0.37.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-io
Successfully installed tensorflow-io-0.37.1


In [None]:
import tensorflow as tf
import os
import tensorflow_addons as tfa
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras import regularizers

In [None]:
# Define constants
IMG_SIZE = 64
BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Define the feature description based on your TFRecord structure
def _parse_image_function(example_proto):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),  # Image bytes
        'class': tf.io.FixedLenFeature([], tf.int64),   # Image label (int)
        'id': tf.io.FixedLenFeature([], tf.string),     # Unique ID (optional)
    }
    return tf.io.parse_single_example(example_proto, feature_description)

# Function to decode the image bytes and resize
def decode_image(image):
    image = tf.io.decode_jpeg(image, channels=3)  # Decode JPEG image to 3 channels (RGB)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])  # Resize the image
    image = image / 255.0  # Normalize pixel values to [0, 1]
    return image

# Combine parsing and decoding to return both image and label
def load_and_process_image(example_proto):
    # Parse the example proto
    parsed_record = _parse_image_function(example_proto)
    image = decode_image(parsed_record['image'])  # Decode image
    label = parsed_record['class']  # Get the class label
    return image, label  # Return both the image and label

# Define the data augmentation function
def augment_image(image, label):
    # Apply data augmentation to the image
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, max_delta=0.2)  # Adjust brightness
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)  # Adjust contrast
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2)  # Adjust saturation
    image = tf.image.random_hue(image, max_delta=0.2)  # Adjust hue
    image = tf.image.rot90(image, k=tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))  # Rotate

    return image, label

# List all TFRecord files
folder_path = "/content/Flower_NN/Train"  # Update this path to your folder
file_pattern = os.path.join(folder_path, "*.tfrec")

# Load the list of files using TensorFlow's Dataset API
file_paths = tf.data.Dataset.list_files(file_pattern)

def load_dataset_with_errors_ignored(file_paths):
    dataset = tf.data.TFRecordDataset(file_paths)
    dataset = dataset.map(load_and_process_image, num_parallel_calls=AUTOTUNE)
    # Use the experimental function to ignore corrupted records
    dataset = dataset.apply(tf.data.experimental.ignore_errors())
    return dataset

# Load the dataset
train_dataset = load_dataset_with_errors_ignored(file_paths)
train_dataset = train_dataset.map(augment_image, num_parallel_calls=AUTOTUNE)

# Batch, shuffle, and prefetch the dataset for efficient loading
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)




In [None]:
# Function to extract the class (label) from the parsed dataset
def extract_label(example_proto):
    # Parse the example proto (adjust this according to your TFRecord structure)
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'class': tf.io.FixedLenFeature([], tf.int64),
        'id': tf.io.FixedLenFeature([], tf.string),
    }
    parsed_record = tf.io.parse_single_example(example_proto, feature_description)
    return parsed_record['class']  # Return only the class (label)

# Function to load dataset (without batching, to get all labels)
def load_labels(file_paths):
    dataset = tf.data.TFRecordDataset(file_paths)
    dataset = dataset.map(extract_label)  # Only extract the labels
    return dataset

# Load dataset function with ignore_errors
def load_dataset_with_errors_ignored(file_paths):
    dataset = tf.data.TFRecordDataset(file_paths)
    dataset = dataset.map(extract_label, num_parallel_calls=AUTOTUNE)
    dataset = dataset.apply(tf.data.experimental.ignore_errors())  # Ignore corrupted records
    return dataset

# Get the dataset of labels with error handling
label_dataset = load_dataset_with_errors_ignored(file_paths)

# Collect unique labels
unique_labels = set()
for label in label_dataset:
    unique_labels.add(label.numpy())  # Convert Tensor to numpy and add to the set

# Calculate the number of unique classes
num_classes = len(unique_labels)
print(f"Number of unique classes in the dataset: {num_classes}")


Number of unique classes in the dataset: 104


In [None]:
for image_batch, label_batch in train_dataset.take(1):
    print("Image batch shape:", image_batch.shape)
    print("Label batch shape:", label_batch.shape)
    print("First image in batch:", image_batch[0])
    print("First label in batch:", label_batch[0])


Image batch shape: (32, 64, 64, 3)
Label batch shape: (32,)
First image in batch: tf.Tensor(
[[[0.3636907  0.15994547 0.29591978]
  [0.3678013  0.18464172 0.3032109 ]
  [0.3760225  0.19698004 0.29971683]
  ...
  [0.3636907  0.07760292 0.28319782]
  [0.3636907  0.09407142 0.30221078]
  [0.35135892 0.10644241 0.29781264]]

 [[0.3760225  0.15580875 0.31394142]
  [0.3636907  0.176414   0.3025813 ]
  [0.3678013  0.192876   0.28801462]
  ...
  [0.3678013  0.08994779 0.2968148 ]
  [0.3636907  0.08583716 0.2927042 ]
  [0.35135892 0.0899739  0.27468258]]

 [[0.3636907  0.15582836 0.29528368]
  [0.3678013  0.18052459 0.30669188]
  [0.3760225  0.19698004 0.29971683]
  ...
  [0.3678013  0.08171353 0.28730842]
  [0.37191188 0.1022926  0.31043184]
  [0.35135892 0.09820813 0.28418913]]

 ...

 [[0.34037408 0.10199614 0.        ]
  [0.35270587 0.08882963 0.        ]
  [0.36503765 0.10336486 0.        ]
  ...
  [0.2938106  0.11888525 0.251078  ]
  [0.26914704 0.09833881 0.22293344]
  [0.2732576  0.1065

In [None]:
# Function to build a CNN model
def build_model(num_classes):
    model = models.Sequential()

    # 1st Convolutional Block
    model.add(layers.Conv2D(64, (3, 3), activation = 'relu', input_shape=(IMG_SIZE, IMG_SIZE, 3), padding='same', kernel_initializer = 'he_uniform'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 2nd Convolutional Block
    model.add(layers.Conv2D(128, (3, 3), activation = 'relu', padding='same', kernel_initializer = 'he_uniform'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 3rd Convolutional Block
    model.add(layers.Conv2D(256, (3, 3), activation = 'relu', padding='same', kernel_initializer = 'he_uniform'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # 4th Convolutional Block
    model.add(layers.Conv2D(512, (3, 3), activation = 'relu', padding='same', kernel_initializer = 'he_uniform'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # Flatten the output for Dense layers
    model.add(layers.Flatten())

    # Fully connected layer
    model.add(layers.Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer = 'he_uniform'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer = 'he_uniform'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer = 'he_uniform'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer = 'he_uniform'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer = 'he_uniform'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer = 'he_uniform'))
    model.add(layers.Dropout(0.2))

    # Output layer
    model.add(layers.Dense(num_classes, activation='softmax'))  # For multi-class classification

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['sparse_categorical_accuracy'])

    return model


In [None]:
VALID_IMG_SIZE = 64
VALID_BATCH_SIZE = 32

# Define the same image parsing and decoding function
def load_and_process_validation_image(example_proto):
    parsed_record = _parse_image_function(example_proto)  # Reuse your _parse_image_function
    image = decode_image(parsed_record['image'])  # Reuse your decode_image function
    label = parsed_record['class']
    return image, label

# Load the TFRecord files for the validation dataset
valid_file_pattern = "/content/Flower_NN/Val"  # Update this with your validation TFRecord path
valid_file_pattern = os.path.join(folder_path, "*.tfrec")

# Load the list of files using TensorFlow's Dataset API
valid_file_paths = tf.data.Dataset.list_files(valid_file_pattern)

def load_validation_dataset(valid_file_paths):
    dataset = tf.data.TFRecordDataset(file_paths)
    dataset = dataset.map(load_and_process_image, num_parallel_calls=AUTOTUNE)
    dataset = dataset.apply(tf.data.experimental.ignore_errors())  # Skip corrupted records
    return dataset

# Load, batch, and prefetch the validation dataset (no shuffling needed for validation)
valid_dataset = load_validation_dataset(valid_file_paths)
valid_dataset = valid_dataset.batch(VALID_BATCH_SIZE).prefetch(AUTOTUNE)

In [None]:
# Example: Assume you have 5 different classes in your dataset
num_classes = 104  # Replace this with the actual number of classes in your dataset
model = build_model(num_classes)

# Training the model
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001)
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(train_dataset, epochs=100, batch_size=32, validation_data=valid_dataset, callbacks=[reduce_lr,early_stop])



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
def load_and_process_test_image(example_proto):
    # Define the feature description for test data (exclude the 'class' field)
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),  # Only image feature
        'id': tf.io.FixedLenFeature([], tf.string)  # Optional: Keep the ID if needed
    }
    parsed_record = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.io.decode_jpeg(parsed_record['image'], channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])  # Resize to same size as training
    image = image / 255.0  # Normalize pixel values to [0, 1]
    return image, parsed_record['id']  # Return image and optional ID

# Update load_test_dataset to use the test image parsing function
def load_test_dataset(file_paths):
    dataset = tf.data.TFRecordDataset(file_paths)
    dataset = dataset.map(load_and_process_test_image, num_parallel_calls=AUTOTUNE)
    return dataset

# Assuming test data is in a TFRecord format, load the test dataset
test_file_pattern = "/content/Flower_NN/Test/*.tfrec"
test_file_paths = tf.data.Dataset.list_files(test_file_pattern)
test_dataset = load_test_dataset(test_file_paths)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)


In [None]:
# Use the trained model to predict classes for the test dataset
predictions = model.predict(test_dataset)

# Convert probabilities to predicted classes (e.g., the index with the highest probability)
predicted_classes = tf.argmax(predictions, axis=1).numpy()  # Convert to numpy array




In [None]:
import pandas as pd

# Extract IDs from the test dataset
ids = []
for _, id_batch in test_dataset:
    ids.extend(id_batch.numpy())

# Create a DataFrame for submission
submission_df = pd.DataFrame({
    'id': ids,
    'label': predicted_classes  # Use predicted class indices
})

# Save to CSV
submission_df.to_csv('submission.csv', index=False)