<a href="https://colab.research.google.com/github/Jyothiraditya135/Some-Codes/blob/main/EfficientNetB0_Doodle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import json
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import torch.optim as optim
import torchvision.transforms as transforms

In [None]:
!gdown --id "1peDYAB52fFNnMmrGDT7oyJJ8a3yZjJGg"

Downloading...
From: https://drive.google.com/uc?id=1peDYAB52fFNnMmrGDT7oyJJ8a3yZjJGg
To: /content/doodle-detectives-aiclubiitm.zip
100% 114M/114M [00:01<00:00, 66.6MB/s]


In [None]:
!unzip "/content/doodle-detectives-aiclubiitm.zip"

Archive:  /content/doodle-detectives-aiclubiitm.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [None]:
df = pd.read_csv("/content/train.csv")

In [None]:
df.head()

Unnamed: 0,drawing,key_id,word,recognized
0,"[[[108, 91, 51, 5, 19, 49, 32, 4, 0, 3, 51, 11...",6351448545689600,fish,False
1,"[[[98, 89, 82, 71, 48, 16, 9, 5, 5, 12, 48, 56...",5623056384917504,potato,False
2,"[[[185, 217, 255, 255, 243, 243, 240, 235, 222...",4538232068898816,fish,True
3,"[[[79, 61, 59, 59, 65, 65, 56, 55, 83, 105, 12...",6629670151782400,flamingo,False
4,"[[[87, 108, 133, 169, 232, 242, 244, 255, 246,...",5793545258532864,bulldozer,True


In [None]:
unique_values = df['recognized'].unique()
print(unique_values)

[False  True]


In [None]:
df_true = df[df['recognized']==True]
df.head()

In [None]:
df_false = df[df['recognized']==False]
df.head()

In [None]:
num_images_per_class = 200
grouped = df.groupby('word')
selected_df = pd.DataFrame()
for label, group in grouped:
    selected_samples = group.sample(n=num_images_per_class, random_state=42)
    selected_df = pd.concat([selected_df, selected_samples])
selected_df = selected_df.reset_index(drop=True)

In [None]:
dff = pd.concat([selected_df, df_true], ignore_index=True)
df = dff.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
def categorical_focal_loss(gamma=2., alpha=.25):
    def categorical_focal_loss_fixed(y_true, y_pred):
        y_pred /= tf.keras.backend.sum(y_pred, axis=-1, keepdims=True)
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * tf.math.log(y_pred)
        loss = alpha * tf.keras.backend.pow(1 - y_pred, gamma) * cross_entropy
        return tf.keras.backend.sum(loss, axis=1)
    return categorical_focal_loss_fixed

In [None]:
# Load the EfficientNetB0 model without the top classification layer
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Get unique labels from the DataFrame
unique_labels = df['word'].unique()
num_classes = len(unique_labels)

# Create a mapping from label strings to integers
label_to_index = {label: index for index, label in enumerate(unique_labels)}

# Add a new column with integer labels
df['label_id'] = df['word'].map(label_to_index)

model = tf.keras.Sequential([
     base_model,
     GlobalAveragePooling2D(),
     Dense(num_classes, activation='softmax')
  ])

# Compile the model
model.compile(optimizer='adam', loss=categorical_focal_loss(gamma=5., alpha=.81), metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [None]:
# # Function to preprocess JSON representations and return an image array
# def preprocess_json(json_data):
#     image_data = json.loads(json_data)
#     # Assuming JSON representation contains grayscale pixel values in the 'data' field
#     image_array = np.array(image_data['data'])
#     image_array = image_array.reshape(224, 224, 1)  # Add a single channel for grayscale
#     image_array = np.repeat(image_array, 3, axis=2)  # Convert to RGB format
#     image_array = img_to_array(image_array)
#     image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
#     return image_array

from PIL import Image, ImageDraw
import numpy as np
import json

def vector_to_numpy(drawing, side=256):
    image = vector_to_image(drawing, side)
    image_array = np.array(image)
    return image_array

transform = transforms.Compose([
    #transforms.Grayscale(num_output_channels=3),  # Convert grayscale to RGB
    transforms.Resize((224, 224)),  # Resize the image to the input size of EfficientNet
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize the image
])

def vector_to_image(drawing, side=256):
    drawing = json.loads(drawing)
    min_x, min_y, max_x, max_y = calculate_bounding_box(drawing)

    # Calculate the offset to center the drawing within the canvas
    offset_x = (side - (max_x - min_x + 1)) // 2
    offset_y = (side - (max_y - min_y + 1)) // 2

    # Create an RGB image with white background
    image = Image.new('RGB', (side, side), color='white')
    draw = ImageDraw.Draw(image)

    for x, y in drawing:
        xy = [(x0 - min_x + offset_x, y0 - min_y + offset_y) for x0, y0 in zip(x, y)]
        draw.line(xy, fill='black', width=1)

    # Resize the image to (224, 224)
    image = image.resize((224, 224))

    return image


def calculate_bounding_box(drawing):
    all_x = [x for x, _ in drawing]
    all_y = [y for _, y in drawing]

    min_x = min(min(x) for x in all_x)
    min_y = min(min(y) for y in all_y)
    max_x = max(max(x) for x in all_x)
    max_y = max(max(y) for y in all_y)

    return min_x, min_y, max_x, max_y

def image_data_generator(df, batch_size, target_size=(224, 224)):
    num_samples = len(df)
    while True:
        # Shuffle the DataFrame
        df_shuffled = shuffle(df)

        for offset in range(0, num_samples, batch_size):
            batch_samples = df_shuffled.iloc[offset:offset+batch_size]
            X_batch = []
            for json_data in batch_samples['drawing']:
                image = vector_to_image(json_data)
                image = image.resize(target_size)  # Resize to (224, 224)
                image_array = img_to_array(image)
                X_batch.append(image_array)
            X_batch = np.array(X_batch)
            y_batch = to_categorical(batch_samples['label_id'], num_classes=num_classes)
            yield X_batch, y_batch

In [None]:
# Define batch size and number of epochs
batch_size = 200
epochs = 5

# Create the custom data generator
train_generator = image_data_generator(df, batch_size=batch_size)
steps_per_epoch = len(df) // batch_size

# Train the model with contrastive loss using the custom data generator
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs)

# Optionally, you can save the trained model for future use
model.save('efficientnet_grayscale_contrastive_model.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(


In [None]:
# df = pd.read_csv("/content/train.csv")
# df = df[df['recognized']==False]
# df.head()

In [None]:
# # Get unique labels from the DataFrame
# unique_labels = df['word'].unique()
# num_classes = len(unique_labels)

# # Create a mapping from label strings to integers
# label_to_index = {label: index for index, label in enumerate(unique_labels)}

# # Add a new column with integer labels
# df['label_id'] = df['word'].map(label_to_index)

In [None]:
# # Define batch size and number of epochs
# batch_size = 200
# epochs = 2

# # Create the custom data generator
# train_generator = image_data_generator(df, batch_size=batch_size)
# steps_per_epoch = len(df) // batch_size

In [None]:
# from tensorflow.keras.models import load_model
# pretrained_model = load_model('/content/efficientnet_grayscale_contrastive_model.h5')

# # Freeze the layers of the pretrained_model model
# for layer in pretrained_model.layers:
#     layer.trainable = False

# # Get unique labels from the DataFrame
# unique_labels = df['word'].unique()
# num_classes = len(unique_labels)

# # Create a mapping from label strings to integers
# label_to_index = {label: index for index, label in enumerate(unique_labels)}

# # Add a new column with integer labels
# df['label_id'] = df['word'].map(label_to_index)

# full_model = tf.keras.Sequential([
#      pretrained_model,
#      Dense(num_classes, activation='softmax')
#   ])

# # Compile the model
# full_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# full_model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs)
# full_model.save('updated_model.h5')

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# # Assuming 'model' is your trained model
# model.save('/content/drive/MyDrive/my_model.keras')

In [None]:
# from tensorflow.keras.models import load_model

# # Load the model from the local file
# model = load_model('efficientnet_grayscale_contrastive_model.h5')

In [None]:
# Get unique labels from the DataFrame
unique_labels = df['word'].unique()
num_classes = len(unique_labels)

# Create a mapping from label strings to integers
label_to_index = {label: index for index, label in enumerate(unique_labels)}

# Add a new column with integer labels
df['label_id'] = df['word'].map(label_to_index)

In [None]:
test_df = pd.read_csv("/content/test.csv")
ss = pd.read_csv("/content/sample_submission.csv")

In [None]:
test_df.head()

Unnamed: 0,drawing,key_id
0,"[[[99, 98, 102, 104, 96, 101], [39, 43, 41, 36...",4635484858875904
1,"[[[149, 149, 164, 163, 155, 131, 127, 91, 78, ...",5954439485587456
2,"[[[124, 80, 66, 54, 35, 22, 17, 17, 26, 44, 50...",4703897211895808
3,"[[[24, 31, 23, 21, 36, 56, 87, 119, 142, 162, ...",4614697670148096
4,"[[[73, 73, 78, 103, 114, 120, 122, 112, 97, 84...",5669598810603520


In [None]:
inv_map = {v: k for k, v in label_to_index.items()}

In [None]:
def image_test_generator(df, batch_size, target_size=(224, 224)):
    num_samples = len(df)
    while True:
        for offset in range(0, num_samples, batch_size):
            batch_samples = df.iloc[offset:offset+batch_size]
            X_batch = []
            for json_data in batch_samples['drawing']:
                image = vector_to_image(json_data)
                image = image.resize(target_size)  # Resize to (224, 224)
                image_array = img_to_array(image)
                X_batch.append(image_array)
            X_batch = np.array(X_batch)
            yield X_batch

In [None]:
# Assuming you have a DataFrame 'test_df' with testing data

# Define batch size for testing
batch_size_test = 200

# Create a custom data generator for testing data
test_generator = image_test_generator(test_df, batch_size=batch_size_test)

# Calculate the number of steps for testing
steps_test = len(test_df) // batch_size_test

# Generate predictions for testing data
predictions = model.predict(test_generator, steps=steps_test)

# 'predictions' will now contain the model's predicted outputs for the testing data.



In [None]:
batch_samples = test_df.iloc[185400: 185439]
X_batch = []
for json_data in batch_samples['drawing']:
    image = vector_to_image(json_data)
    image = image.resize((224, 224))  # Resize to (224, 224)
    image_array = img_to_array(image)
    X_batch.append(image_array)
X_batch = np.array(X_batch)
preds = model.predict(X_batch)



In [None]:
predictions = np.concatenate((predictions, preds))

In [None]:
predictions = predictions.reshape(-1, 101)
print(predictions)

[[1.2077507e-02 6.1553053e-04 7.4834619e-03 ... 6.4066780e-04
  2.7721971e-02 6.0905437e-03]
 [1.4810668e-02 1.0589069e-03 2.2331015e-03 ... 1.1512469e-02
  7.5245771e-04 2.6997871e-04]
 [3.4910417e-04 1.3711048e-02 2.3388922e-05 ... 6.8243546e-04
  9.0662164e-05 1.0217065e-03]
 ...
 [1.4785310e-03 4.5104025e-04 7.3764436e-03 ... 1.2666466e-02
  8.7801553e-03 1.0578039e-04]
 [8.2819804e-04 4.1304063e-03 1.0434696e-03 ... 4.6770616e-05
  1.8442508e-03 2.1816771e-03]
 [2.1875616e-04 2.0464313e-04 2.5929508e-04 ... 2.0161344e-04
  1.7846655e-04 1.5804740e-03]]


In [None]:
def get_strings(soft_arr):
  top_indices = np.argsort(-1*soft_arr, axis=1)[:, :3]
  s1 = inv_map[top_indices[0][0]]
  s2 = inv_map[top_indices[0][1]]
  s3 = inv_map[top_indices[0][2]]
  return f"{s1} {s2} {s3}"

In [None]:
pr = [row.reshape(1, -1) for row in predictions]

In [None]:
result = [get_strings(row) for row in pr]

In [None]:
ss['word'] = result

In [None]:
ss.to_csv("/content/sample_submission.csv", index=False)