In [1]:
import tensorflow as tf

import os
import shutil
import glob

import zipfile
import pandas as pd
from tqdm import tqdm

In [2]:
# Directory paths
ORG_TRAIN_DIR = "COMP90086_2021_Project_train"
ORG_TEST_DIR = "COMP90086_2021_Project_test"
train_img_dir = ORG_TRAIN_DIR + '/train/'
test_img_dir = ORG_TEST_DIR + '/test/'
TRAIN_DIR = './DATASET'

In [3]:
def extract_zip_files(dir):
    """
    Extracting all the files from the given folder path
    """
    with zipfile.ZipFile(dir + '.zip', "r") as zip_ref:
        zip_ref.extractall(dir)

In [4]:
# Unzipping train and test folders
extract_zip_files(ORG_TRAIN_DIR)
extract_zip_files(ORG_TEST_DIR)

In [5]:
labels = {}
label_to_coordinate = {}

In [6]:
def append_ext(filename):
    """
    Appending image name with .jpg
    """
    return filename + ".jpg"

def create_label(x, y):
    """
    Label Encoding the coordinates
    """
    if (x, y) not in labels:
        if 0 == len(labels.keys()):
            labels[(x, y)] = 0
            label_to_coordinate[0]=(x, y)
        else:
            label = labels[list(labels.keys())[-1]] + 1
            labels[(x, y)] = label
            label_to_coordinate[label]=(x, y)
            
    return labels[(x, y)]

In [7]:
# Reading csv files to read image names and their coordinates
df_train = pd.read_csv(os.path.join(ORG_TRAIN_DIR, 'train.csv'))
df_test = pd.read_csv(os.path.join(ORG_TEST_DIR, 'imagenames.csv'))

# Appending extensions to the image name
df_train["id"] = df_train["id"].apply(append_ext)
df_test["id"] = df_test["id"].apply(append_ext)

# Label encoding the coordinates
df_train["label"] = df_train.apply(lambda key: create_label(key.x, key.y), axis=1)

df_train.head()

Unnamed: 0,id,x,y,label
0,IMG2744_1.jpg,-9.380678,3.58272,0
1,IMG2744_2.jpg,-9.380678,3.58272,0
2,IMG2744_3.jpg,-9.380678,3.58272,0
3,IMG2744_4.jpg,-9.380678,3.58272,0
4,IMG2744_5.jpg,-9.380678,3.58272,0


In [8]:
# Storing all the labels
y_train = df_train['label'].tolist()

n_classes = len(set(y_train))

CLASS = list(set(y_train))

In [9]:
# Segregating our images into folder structure based on their location
for C_ID in tqdm(range(len(CLASS))):
    DEST = os.path.join(TRAIN_DIR,str(C_ID))
    
    if not os.path.exists(DEST):
        os.makedirs(DEST)
    
    for i in range(df_train.shape[0]):
        if df_train['label'][i] == C_ID:
            SRC = os.path.join(train_img_dir,df_train['id'][i])
            shutil.copy(SRC, DEST)

100%|██████████████████████████████████████████████████████████████████████████████| 1499/1499 [01:43<00:00, 14.47it/s]


# Model Building

In [10]:
# Libraries needed for building the model
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

import numpy as np
import matplotlib.pyplot as plt

# Pre process data using data generator

In [11]:
# Splitting the generator into 80:20 with image size (256, 256)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_datagener = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(256, 256),
    batch_size=64,
    subset='training'
)

test_datagener = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(256, 256),
    batch_size=64,
    subset='validation'
)

Found 6000 images belonging to 1499 classes.
Found 1500 images belonging to 1499 classes.


In [12]:
keras.backend.clear_session()

cnn = Sequential()
cnn.add(
    Conv2D(filters=64,
           padding='same',
           strides=2,
           kernel_size=3,
           activation='relu',
           input_shape=(256, 256, 3)
         )
)

cnn.add(MaxPool2D(pool_size=2,strides=2))

cnn.add(Flatten())

cnn.add(Dense(n_classes,activation='softmax'))

cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 128, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 262144)            0         
_________________________________________________________________
dense (Dense)                (None, 1499)              392955355 
Total params: 392,957,147
Trainable params: 392,957,147
Non-trainable params: 0
_________________________________________________________________


In [13]:
def manhanttan_dist(y_true, y_pred):
    err = 0
    
    for true, pred in zip(y_true, y_pred):
        err = err + abs(true - pred)

    return err / y_true.shape[1]

In [14]:
# Compiling CNN model
cnn.compile(optimizer=Adam(), 
            loss='categorical_crossentropy',
            metrics=['accuracy']
           )

In [15]:
# Early stopping the model based on the validation loss
es = EarlyStopping(monitor="val_loss",
                  patience=5,
                  verbose=1)

In [17]:
# Training the model
history = cnn.fit(train_datagener,
            epochs=10,
            verbose=True,
            validation_data=test_datagener,
            callbacks=[es]
           )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 00008: early stopping


In [18]:
TEST_DIR = test_img_dir

In [21]:
test_image_data = []

# Storing the test images needed to predict
for i in tqdm(range(df_test.shape[0])):
    img = keras.preprocessing.image.load_img(TEST_DIR + df_test['id'][i], 
                         target_size=(256, 256, 3)
                         )
    img = (keras.preprocessing.image.img_to_array(img)) / 255
    test_image_data.append(img)

predict_x = np.array(test_image_data)

100%|██████████████████████████████████████████████████████████████████████████████| 1200/1200 [00:14<00:00, 84.95it/s]


In [23]:
# Predicting the test images
pred = cnn.predict(predict_x)

In [24]:
predictions = []

# Converting label encoded to coordinates
for i in tqdm(range(df_test.shape[0])):
    predictions.append(label_to_coordinate[np.argmax(pred[i])])

100%|███████████████████████████████████████████████████████████████████████████| 1200/1200 [00:00<00:00, 58255.57it/s]


In [25]:
# Reading the test image name and creating a dataframe
sample = pd.read_csv(ORG_TEST_DIR + '/imagenames.csv')
submission=pd.DataFrame(columns=sample.columns)
submission['id']=sample['id']

In [26]:
split_x_y = pd.DataFrame(predictions, columns=['x', 'y'])

In [27]:
# Storing the coordinates
submission['x'] = split_x_y['x']
submission['y'] = split_x_y['y']

In [28]:
submission.to_csv("./cnn_prediction.csv", index=False)