In [1]:
import pickle
import os

# Process data

In [2]:
from components.DataProcessor import DataProcessor


file_path = 'COMP90086_2023_TLLdataset/train.csv'
test_file_path = 'COMP90086_2023_TLLdataset/test_candidates.csv'
save_path = 'dictionaries'
dataset_base_path = 'COMP90086_2023_TLLdataset'

processor = DataProcessor(file_path, test_file_path, save_path, dataset_base_path)
processor.load_data()
processor.split_data()
processor.generate_dictionaries()
processor.save_dictionaries()
processor.process_test_candidates()


Dictionaries saved to 'dictionaries/train_dict.pkl' and 'dictionaries/val_dict.pkl'
Dictionaries saved to dictionaries/test_dict.pkl


In [4]:
def load_pkl(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

In [5]:
train_data = load_pkl('dictionaries/train_dict.pkl')
val_data = load_pkl('dictionaries/val_dict.pkl')
test_data = load_pkl('dictionaries/test_dict.pkl')

print('train data size: ', len(train_data))
print('val data size: ', len(val_data))
print('test data size: ', len(test_data))

train data size:  1600
val data size:  400
test data size:  2000


In [5]:
# print('train data sample: ', train_data)

# Extract features

In [6]:
from components.FeatureExtractor import FeatureExtractor

# define the output dir & models you want to train here
base_output_dir = "feat"
models_to_train = ["resnet50", "resnet101", "resnet152", "densenet201", "vgg16", "mobilenet"]

train_data = load_pkl('saved_dictionaries/train_dict.pkl')
val_data = load_pkl('saved_dictionaries/val_dict.pkl')

for model_name in models_to_train:
    extractor = FeatureExtractor(model_name, base_output_dir)
    for set_name, data_dict in [("train_data", train_data), ("val_data", val_data)]:
        extractor.extract_and_save_features(data_dict, set_name)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5


# Analysis

## Load the features

In [15]:
def load_saved_features(model_name, set_name, base_output_dir="feat"):
    file_path = os.path.join(base_output_dir, model_name, set_name, f"{set_name}_features.pkl")
    with open(file_path, 'rb') as f:
        features = pickle.load(f)
    return features


base_output_dir = "feat"
model_name = "resnet50"  
set_name = "train_data" 

loaded_features = load_saved_features(model_name, set_name, base_output_dir)