In [1]:
# imports
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import cv2
import tensorflow as tf
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow import data as tf_data
import time
from sklearn.metrics import confusion_matrix, precision_score, recall_score, log_loss
import numpy as np
from typing import List, Dict, Any, Union

from utils import print_progress_bar
print("Modules imported")

Modules imported


In [2]:
# variables
all_labels = ['nature', 'country', 'city']
path = "../datasets/all_data/entropy_results_short.json"
parallel_jobs = 5

# hyperparameters
test_part = 0.05
epochs = 2
batch_size = 64
learning_rate = 0.01

# check gpu
devices = tf.config.list_physical_devices()
print("Available devices:")
for device in devices:
    print(device.name)
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print(f"The model will run on GPU: {physical_devices[0].name}")
else:
    print("No GPU found, the model will run on CPU.")

Available devices:
/physical_device:CPU:0
/physical_device:GPU:0
The model will run on GPU: /physical_device:GPU:0


In [3]:
# heads
class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_size, heads):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert (
            self.head_dim * heads == embed_size
        ), "Embedding size needs to be divisible by heads"

        self.wq = tf.keras.layers.Dense(self.head_dim)
        self.wk = tf.keras.layers.Dense(self.head_dim)
        self.wv = tf.keras.layers.Dense(self.head_dim)

    def call(self, inputs):
        Q = self.wq(inputs)
        K = self.wk(inputs)
        V = self.wv(inputs)

        matmul_qk = tf.matmul(Q, K, transpose_b=True)

        depth = tf.cast(tf.shape(K)[-1], tf.float32)
        logits = matmul_qk / tf.math.sqrt(depth)

        attention_weights = tf.nn.softmax(logits, axis=-1)

        output = tf.matmul(attention_weights, V)
        return output

In [13]:
# model
class EntropyClassifier(tf.keras.Model):
    def __init__(self, possible_labels):
        super(EntropyClassifier, self).__init__()
    
        self.possible_labels = possible_labels
        self.debug = False
    
        self.dwt_input_layer = tf.keras.layers.Dense(10, activation='relu')
        self.lvl0_input_layer = tf.keras.layers.Dense(17, activation='relu')
        self.lvl1_input_layers = [tf.keras.layers.Conv2D(1, (2, 2), activation='relu') for _ in range(17)]
        
        self.lvl2_input_layers = [tf.keras.Sequential([
            tf.keras.layers.Conv2D(1, (2, 2), activation='relu'), 
            tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))
        ]) for _ in range(17)]
    
        self.lvl3_input_layers = [tf.keras.Sequential([
            tf.keras.layers.Conv2D(1, (2, 2), activation='relu'), 
            tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))
        ]) for _ in range(17)]

        dwt_output_size = 10
        lvl0_output_size = 17
        lvl1_output_size = 17 
        lvl2_output_size = 17  
        lvl3_output_size = 153 

        embed_size = dwt_output_size + lvl0_output_size + lvl1_output_size + lvl2_output_size + lvl3_output_size # = 214

        heads = 1  # Choose based on your specific requirements or experimentation
        assert embed_size % heads == 0, "Embedding size needs to be divisible by heads"

        self.self_attention = SelfAttention(embed_size, heads)

        self.fc_layer = tf.keras.layers.Dense(128, activation='relu')
        self.output_layer = tf.keras.layers.Dense(len(possible_labels), activation='softmax')

    def call(self, inputs, training=False):
        lvl0_inputs, lvl1_inputs, lvl2_inputs, lvl3_inputs, dwt_inputs = inputs
    
        batch_size = tf.shape(dwt_inputs)[0]
    
        # Ensure inputs have a batch dimension
        if len(lvl1_inputs.shape) == 3:
            lvl1_inputs = tf.expand_dims(lvl1_inputs, axis=0)
        if len(lvl2_inputs.shape) == 3:
            lvl2_inputs = tf.expand_dims(lvl2_inputs, axis=0)
        if len(lvl3_inputs.shape) == 3:
            lvl3_inputs = tf.expand_dims(lvl3_inputs, axis=0)
    
        dwt_output = self.dwt_input_layer(dwt_inputs)
        lvl0_output = self.lvl0_input_layer(lvl0_inputs)
    
        lvl1_output = tf.concat([self.lvl1_input_layers[i](lvl1_inputs[:, :, :, i:i+1]) for i in range(17)], axis=-1)
        lvl2_output = tf.concat([self.lvl2_input_layers[i](lvl2_inputs[:, :, :, i:i+1]) for i in range(17)], axis=-1)
        lvl3_output = tf.concat([self.lvl3_input_layers[i](lvl3_inputs[:, :, :, i:i+1]) for i in range(17)], axis=-1)
    
        concatenated_output = tf.concat([tf.reshape(dwt_output, [batch_size, -1]), 
                                         tf.reshape(lvl0_output, [batch_size, -1]), 
                                         tf.reshape(lvl1_output, [batch_size, -1]), 
                                         tf.reshape(lvl2_output, [batch_size, -1]), 
                                         tf.reshape(lvl3_output, [batch_size, -1])], axis=-1)
        
        attention_output = self.self_attention(concatenated_output)
        fc_output = self.fc_layer(attention_output)
        final_output = self.output_layer(fc_output)
    
        if self.debug:
            print(dwt_output.shape)
            print(lvl0_output.shape)
            print(lvl1_output.shape)
            print(lvl2_output.shape)
            print(lvl3_output.shape)
            print(concatenated_output.shape)
            print(attention_output.shape)
            print(fc_output.shape)
            print(final_output.shape)
    
        return final_output

    def train_model(self, dataset, epochs=100, batch_size=64, lr=0.01):
        loss = None
        formatted_dataset = format_dataset(dataset)

        train_dataset = formatted_dataset.batch(batch_size)

        criterion = CategoricalCrossentropy(from_logits=False)
        optimizer = Adam(learning_rate=lr)

        for epoch in range(epochs):
            print(f'Starting epoch {epoch+1}/{epochs}')
            for batch_idx, (data, target) in enumerate(train_dataset):
                target = tf.convert_to_tensor([tf.one_hot(t, len(self.possible_labels)) for t in target], dtype=tf.float32)

                with tf.GradientTape() as tape:
                    output = self(data, training=True)
                    loss = criterion(target, output)
                gradients = tape.gradient(loss, self.trainable_variables)
                optimizer.apply_gradients(zip(gradients, self.trainable_variables))


            if loss is not None:
                print(f'Epoch {epoch+1} completed, Loss: {loss.numpy()}')

    def predict(self, inputs):
        if len(inputs[0].shape) != 4 or inputs[0].shape[0] != 1:
            raise ValueError("Input batch size should be 1")
    
        output = self(inputs, training=False)
        probabilities = tf.nn.softmax(output)
        max_index = tf.argmax(probabilities)
        label_prob_dict = {label: prob.numpy() for label, prob in zip(self.possible_labels, probabilities[0])}
    
        return str(self.possible_labels[max_index.numpy()]), label_prob_dict



In [5]:
def process_entry(entry):
    """Process the entropy results to extract the levels."""
    label = entry['label']
    machine_input = {0: [], 1: [], 2: [], 3: [], 'dwt': []}
    for ent in entry['entropy_results']:
    
        if ent['method'] == 'dwt':
            machine_input['dwt'] = tf.convert_to_tensor(ent['result'], dtype=tf.float32)
        else:
            for lvl, content in enumerate(ent['result']):
                machine_input[lvl].append(tf.convert_to_tensor(content, dtype=tf.float32))
    
    machine_input[0] = tf.concat(machine_input[0], axis=-1)
    machine_input[1] = tf.concat(machine_input[1], axis=-1)
    machine_input[2] = tf.concat(machine_input[2], axis=-1)
    machine_input[3] = tf.concat(machine_input[3], axis=-1)
    machine_input['dwt'] = tf.reshape(machine_input['dwt'], [1, 1, 10])
    
    return {'input': machine_input, 'label': label}  


def format_dataset(dataset):
    """Formats and shuffles the dataset for training"""
    label_num = {'nature': 0, 'country': 1, 'city': 2}
    formatted_dataset = []
    for item in dataset:
        machine_input = item['input']
        label = label_num[item['label']]
        formatted_dataset.append((
            (
                machine_input[0], 
                machine_input[1], 
                machine_input[2], 
                machine_input[3], 
                machine_input['dwt']
            ), 
            label
        ))
    return tf.data.Dataset.from_generator(
        lambda: iter(formatted_dataset), 
        output_signature=(
            (
                tf.TensorSpec(shape=(1, 1, 17), dtype=tf.float32),
                tf.TensorSpec(shape=(2, 2, 17), dtype=tf.float32),
                tf.TensorSpec(shape=(4, 4, 17), dtype=tf.float32),
                tf.TensorSpec(shape=(8, 8, 17), dtype=tf.float32),
                tf.TensorSpec(shape=(1, 1, 10), dtype=tf.float32)
            ),
            tf.TensorSpec(shape=(), dtype=tf.int32),
        )
    ).shuffle(buffer_size=len(dataset))


def process_json(path, test_part, parallel_jobs=4):
    """Process JSON data to extract dataset and features."""
    with open(path, 'r') as f:
        metadata = json.load(f)
    dataset = []

    t = time.time()
    n = len(metadata)

    with ThreadPoolExecutor(max_workers=parallel_jobs) as executor:
        futures = [executor.submit(process_entry, entry) for entry in metadata]
        for i, future in enumerate(as_completed(futures)):
            result = future.result()
            if result is not None:
                dataset.append(result)
            print_progress_bar('Processed entry', i+1, n, t)

    if isinstance(test_part, float):
        i = int(test_part * len(dataset))
    elif isinstance(test_part, str):
        i = int(test_part)
    else:
        raise ValueError("Incompatible format for 'test_part'.")

    test_set = dataset[-i:]
    dataset = dataset[:-i]

    num_classes = len(all_labels)
    dataset_length = len(dataset)

    return dataset, test_set, num_classes, dataset_length


In [6]:
# load data
dataset, test_set, num_classes, dataset_length = process_json(path, test_part)
print('\nDataset processed.')
print(f"Total number of entries in the dataset: {dataset_length}")
print(f"Total number of entries in the test set: {len(test_set)}")    
print(f"Number of classes: {num_classes}")
print(f"Dataset length: {dataset_length}") 

Processed entry: ██████████████████████████████████████████████████ | Completed: 256/256 100.0% | Time elapsed: 00:01/00:01 | Time left: ~= 00:00
Dataset processed.
Total number of entries in the dataset: 244
Total number of entries in the test set: 12
Number of classes: 3
Dataset length: 244


In [7]:
sample = dataset[0]
print(sample['label'])
for inp, val in sample['input'].items():
    print(f'level: {inp}. shape: {val.shape}')
machine_inputs = tuple(sample['input'].values())

nature
level: 0. shape: (1, 1, 17)
level: 1. shape: (2, 2, 17)
level: 2. shape: (4, 4, 17)
level: 3. shape: (8, 8, 17)
level: dwt. shape: (1, 1, 10)


In [14]:
# model creation
file_name = f"EntropyClassifier_e={epochs}_ds={dataset_length}.pth"
model = EntropyClassifier(all_labels)
print('Model created')

Model created


In [15]:
o = model.call(machine_inputs)
print(o.numpy().tolist())

[[0.3379935622215271, 0.49024030566215515, 0.17176614701747894]]
