In [3]:
!nvidia-smi

Tue May 23 16:22:56 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   65C    P8    11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [8]:
from google.colab import drive
import os
import zipfile

# Mount Google Drive
drive.mount('/content/drive')
# Define the path to the zip file
zip_path = '/content/drive/MyDrive/multi-label-classification-competition-2023.zip'
# Check if the file exists
if os.path.exists(zip_path):
    # Define the directory to unzip to
    unzip_dir = '/content/drive/MyDrive/'
    # Create a ZipFile Object
    with zipfile.ZipFile(zip_path) as zip_ref:
        # Extract all the contents of the zip file into the defined directory
        zip_ref.extractall(unzip_dir)
        print("Files have been unzipped successfully.")
else:
    print("The specified zip file does not exist.")

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')
import re
import pandas as pd
from io import StringIO
datafolderpath ="/content/drive/MyDrive/COMP5329S1A2Dataset/data"
testcsvpath = "/content/drive/MyDrive/COMP5329S1A2Dataset/test.csv"
traincsvpath = "/content/drive/MyDrive/COMP5329S1A2Dataset/train.csv"
# load data to dataframe
with open(traincsvpath) as file:
    lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
    df_train_origin = pd.read_csv(StringIO(''.join(lines)), escapechar="/")
with open(testcsvpath) as file:
    lines = [re.sub(r'([^,])"(\s*[^\n])', r'\1/"\2', line) for line in file]
    df_test = pd.read_csv(StringIO(''.join(lines)), escapechar="/")
df_train_origin = df_train_origin.drop(columns = 'Caption').join(df_train_origin['Caption'].str.replace('\"', ''))
df_test = df_test.drop(columns = 'Caption').join(df_test['Caption'].str.replace('\"', ''))
print(df_train_origin )
print(df_test)

         ImageID  Labels                                            Caption
0          0.jpg       1   Woman in swim suit holding parasol on sunny day.
1          1.jpg    1 19  A couple of men riding horses on top of a gree...
2          2.jpg       1  They are brave for riding in the jungle on tho...
3          3.jpg  8 3 13  a black and silver clock tower at an intersect...
4          4.jpg   8 3 7   A train coming to a stop on the tracks out side.
...          ...     ...                                                ...
29991  29995.jpg   8 1 2  A picture of a truck that is in the middle of ...
29992  29996.jpg       1  A plate topped with a pizza being cut with a s...
29993  29997.jpg       1          A man riding a snowboard on top of  snow.
29994  29998.jpg       1   This photo shows people skiing in the mountains.
29995  29999.jpg       1  Two young men playing soccer and fighting for ...

[29996 rows x 3 columns]
        ImageID                                            Cap

In [4]:
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from keras.models import Model
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.models import Sequential
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import string
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import backend as K
import tensorflow.keras.backend as K
# Initialize stemmer
stemmer = PorterStemmer()

# Function to preprocess captions
def preprocess_caption(caption):
    # Convert to lowercase
    caption = caption.lower()
    # Tokenize
    tokens = word_tokenize(caption)
    # Remove punctuation
    tokens = [token for token in tokens if token not in string.punctuation]
    # Remove stop words
    tokens = [token for token in tokens if token not in stopwords.words('english')]
    # Stemming
    tokens = [stemmer.stem(token) for token in tokens]
    return ' '.join(tokens)


def f1_score(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))

    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())

    f1_val = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1_val

# Preprocess captions
df_train_origin['Caption'] = df_train_origin['Caption'].apply(preprocess_caption)
df_test['Caption'] = df_test['Caption'].apply(preprocess_caption)

# Convert labels into binary form
mlb = MultiLabelBinarizer()
binary_labels = mlb.fit_transform(df_train_origin['Labels'].str.split())

# Convert captions into numerical features
vectorizer = TfidfVectorizer(max_features=1000)
caption_features = vectorizer.fit_transform(df_train_origin['Caption']).toarray()

# Load pre-trained ResNet50
base_model = ResNet50(weights='imagenet')
feature_extractor = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)


# Function to preprocess images and extract features in batches
def extract_features_batch(img_paths,model, batch_size=1024):
    n = len(img_paths)
    features = []
    for i in range(0, n, batch_size):
        batch_paths = img_paths[i:i+batch_size]
        batch_imgs = np.array([img_to_array(load_img(path, target_size=(224, 224))) for path in batch_paths])
        
        batch_imgs = preprocess_input(batch_imgs)
        batch_features = model.predict(batch_imgs)
        features.extend(batch_features)
    return np.array(features)

# Extract features from images
image_features = extract_features_batch([os.path.join(datafolderpath, img_path) for img_path in df_train_origin['ImageID']],feature_extractor)

# Combine features
final_features = np.concatenate([caption_features, image_features], axis=1)



In [5]:
# Define the model
model = Sequential()
model.add(Dense(1024, activation='relu', input_shape=(final_features.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(binary_labels.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=[f1_score])

# Define callbacks
checkpoint = ModelCheckpoint('model.h5', monitor='val_loss', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model
model.fit(final_features, binary_labels, epochs=20, batch_size=32, validation_split=0.2, callbacks=[checkpoint, early_stopping])

  super().__init__(name, **kwargs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


<keras.callbacks.History at 0x7f7130432020>

In [6]:
# Preprocess test captions
test_caption_features = vectorizer.transform(df_test['Caption']).toarray()

# Extract features from test images
test_image_features = extract_features_batch([os.path.join(datafolderpath, img_path) for img_path in df_test['ImageID']],feature_extractor)

# Combine test features
test_final_features = np.concatenate([test_caption_features, test_image_features], axis=1)

# Make predictions on the test data
predictions = model.predict(test_final_features)
# Convert predictions to binary
binary_predictions = (predictions > 0.5).astype(int)

# Convert binary predictions to labels
predicted_labels = mlb.inverse_transform(binary_predictions)

# Prepare submission dataframe
submission_df = pd.DataFrame({
    'ImageID': df_test['ImageID'],
    'Labels': [' '.join(map(str, labels)) for labels in predicted_labels]
})

# Write submission dataframe to CSV
submission_df.to_csv('submission.csv', index=False)


