In [1]:
import os
import json
import pandas as pd
import numpy as np

import tensorflow as tf
from datetime import datetime as dt

from hmc.utils.dir import create_dir
from hmc.model.arguments import  build
from hmc.dataset import Dataset

2024-05-28 20:58:42.574248: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from hmc.model import build_model

In [8]:
# Set python level verbosity
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)

# Set C++ Graph Execution level verbosity
os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(tf.compat.v1.logging.DEBUG)

base_path = "/mnt/disks/data/fma/trains"
sample_id = "hierarchical_tworoots_dev"


train_path = os.path.join(base_path, sample_id)
tfrecords_path =os.path.join(train_path,'tfrecords')
metadata_path = os.path.join(train_path,"metadata.json")
labels_path = os.path.join(train_path,"labels.json")

model_path = "/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/2024_05_28_20_45_02"

args = pd.Series({
    "batch_size":32,
    "epochs":10,
    "dropout":0.5,
    'patience':1,
    'max_queue_size':64,
    "labels_path": labels_path,
    "metadata_path": metadata_path,
    "trainset_pattern": os.path.join(tfrecords_path,'train'),
    "testset_pattern": os.path.join(tfrecords_path,'test'),
    "valset_pattern": os.path.join(tfrecords_path,'val'),
    "model_path":model_path
})


In [9]:
binary_model = os.path.join(args.model_path, 'best_binary.keras')

In [10]:
with open(args.metadata_path, 'r') as f:
        metadata = json.loads(f.read())
        print(metadata)

with open(args.labels_path, 'r') as f:
    labels = json.loads(f.read())


levels_size = {}
for i in range(1, metadata['max_depth']+1):
    levels_size[f'level{i}'] = labels[f'label_{i}_count']


params: dict = {
    'levels_size': levels_size,
    'sequence_size': metadata['sequence_size'],
    'dropout': args.dropout
}

print(params)
model = build_model(**params)

{'sequence_size': 1280, 'max_depth': 4, 'levels_size': [2, 30, 16], 'val_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/tfrecords/val', 'train_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/tfrecords/train', 'test_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/tfrecords/test', 'val_csv': '/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/val.csv', 'train_csv': '/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/train.csv', 'test_csv': '/mnt/disks/data/fma/trains/hierarchical_tworoots_dev/test.csv', 'trainset_count': 16791, 'validationset_count': 2007, 'testset_count': 4814}
{'levels_size': {'level1': 2, 'level2': 30, 'level3': 16, 'level4': 3}, 'sequence_size': 1280, 'dropout': 0.5}


  super().__init__(**kwargs)


In [11]:
df_test = Dataset(args.testset_pattern, args.epochs, args.batch_size, levels_size).build(df=True)

2024-05-28 21:00:35.249234: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [12]:
df_test['features'] = df_test.features.apply(lambda x: x['features'].tolist())

In [13]:
model.load_weights(binary_model)

  saveable.load_own_variables(weights_store.get(inner_path))


In [14]:
model.summary()

In [15]:
X_test = df_test.features.values.tolist()

In [16]:
# Exemplo de ajuste manual, se necessário
X_test = np.reshape(X_test, (len(X_test), -1))
print(f"Formato de X_test após reshape: {X_test.shape}")

Formato de X_test após reshape: (4813, 1280)


In [17]:
print(X_test.shape)

(4813, 1280)


In [18]:
x = X_test[:10]

In [25]:
predictions = model.predict(x)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [26]:
def apply_threshold(probabilities, threshold=0.5):
    """
    Aplica um limiar para converter probabilidades em valores binários.
    """
    return (probabilities >= threshold).astype(int)

In [35]:
# Limiar para converter probabilidades em binários
threshold = 0.1

# Converte as predições em valores binários
binary_predictions = [apply_threshold(pred, threshold) for pred in predictions]

In [37]:
predictions

array([[1.4518132e-05, 2.1069400e-02, 3.5019927e-02],
       [4.6158344e-09, 1.9732465e-03, 1.3553320e-03],
       [5.5970736e-07, 1.3791395e-02, 6.6899634e-03],
       [1.7297678e-05, 2.0084914e-02, 3.5353202e-02],
       [8.2595608e-07, 8.0164019e-03, 1.3581984e-02],
       [3.7599701e-08, 3.3872083e-03, 3.1058753e-03],
       [8.4460817e-07, 1.4028621e-02, 7.2846934e-03],
       [3.5263032e-15, 2.7031587e-05, 7.5980370e-06],
       [2.8109765e-05, 3.8213134e-02, 2.5922984e-02],
       [2.6840280e-10, 8.8994810e-04, 4.3478422e-04]], dtype=float32)

In [20]:
def binary_to_indices(binary_labels):
    """
    Converte vetores binários de rótulos para índices das classes.
    """
    indices = np.where(binary_labels == 1)[0]
    return indices if len(indices) > 0 else [-1]

def translate_proba(results):
    return [np.amax(x, 1) for x in results]



In [None]:

class Predictor:
    def __init__(self, labels, max_depth):
        self.labels = labels
        self.max_depth = max_depth
        self.inverse_labels = {}
        for i in range(1, max_depth+1):
            self.inverse_labels.append(labels[f'level{i}_inverse'])

        self.labels_index = []

        # self.labels1_index = {}
        # self.labels2_index = {}
        # self.labels3_index = {}
        # self.labels4_index = {}
        # self.labels5_index = {}

        self.create_path(labels)

    def create_path(self):
        self.labels_index.append(np.array(list(self.labels['labels1'].values())))
        # self.labels2_index = {}
        # self.labels3_index = {}
        # self.labels4_index = {}
        # self.labels5_index = {}


        for i in range(2, max_depth+1):
            for key, value in labels[f'labels{i}'].items():
                keys = key.split("-")
                try:
                    self.labels_index[i][labels[f'labels{i-1}']["-".join(keys[:-1]])].append(value)
                except
                    self.labels_index[i][labels[f'labels{i-1}']["-".join(keys[:-1])]] = [value]


        # for key, value in labels['labels2'].items():
        #     l1, l2 = key.split("-")
        #     try:
        #         self.labels2_index[labels['labels1'][l1]].append(value)
        #     except:
        #         self.labels2_index[labels['labels1'][l1]] = [value]

        # for key, value in labels['labels3'].items():
        #     l1, l2, l3 = key.split("-")
        #     try:
        #         self.labels3_index[labels['labels2']["-".join([l1, l2])]].append(value)
        #     except:
        #         self.labels3_index[labels['labels2']["-".join([l1, l2])]] = [value]

        # for key, value in labels['labels4'].items():
        #     l1, l2, l3, l4 = key.split("-")
        #     try:
        #         self.labels4_index[labels['labels3']["-".join([l1, l2, l3])]].append(value)
        #     except:
        #         self.labels4_index[labels['labels3']["-".join([l1, l2, l3])]] = [value]

        # for key, value in labels['labels5'].items():
        #     l1, l2, l3, l4, l5 = key.split("-")
        #     try:
        #         self.labels5_index[labels['labels4']["-".join([l1, l2, l3, l4])]].append(value)
        #     except:
        #         self.labels5_index[labels['labels4']["-".join([l1, l2, l3, l4])]] = [value]


        for i in range(2, max_depth+1):
            for k, value in self.labels_index[i].items():
                self.labels_index[i][k] = np.array(value)

        # for k, value in self.labels3_index.items():
        #     self.labels3_index[k] = np.array(value)

        # for k, value in self.labels4_index.items():
        #     self.labels4_index[k] = np.array(value)

        # for k, value in self.labels5_index.items():
        #     self.labels5_index[k] = np.array(value)

    def normalize(self, predictions):
        r1, r2, r3, r4, r5 = [np.array(x, copy=True) for x in predictions]
        rr1 = r1.argmax(axis=1)

        for i, rr in enumerate(rr1):
            filtro = np.ones(r2[i].shape, dtype=bool)
            filtro[self.labels2_index[rr]] = False
            r2[i][filtro] = 0

        rr2 = r2.argmax(1)

        for i, rr in enumerate(rr2):
            filtro = np.ones(r3[i].shape, dtype=bool)
            filtro[self.labels3_index[rr]] = False
            r3[i][filtro] = 0

        rr3 = r3.argmax(1)

        for i, rr in enumerate(rr3):
            filtro = np.ones(r4[i].shape, dtype=bool)
            filtro[self.labels4_index[rr]] = False
            r4[i][filtro] = 0

        rr4 = r4.argmax(1)

        for i, rr in enumerate(rr4):
            filtro = np.ones(r5[i].shape, dtype=bool)
            filtro[self.labels5_index[rr]] = False
            r5[i][filtro] = 0


        return r1, r2, r3, r4, r5

    def translate_predictions(self, predictions):
        results = []

        for index, y_pred in enumerate(predictions):
            results.append([self.inverse_labels[index][label_index] for label_index in y_pred.argmax(1)])

        return results

    def predict_as_df(self, predictions):
        results = self.normalize(predictions)
        cats = self.translate_predictions(results)
        probas = translate_proba(results)
        df = pd.DataFrame([])
        for n, cat, prob in enumerate(zip(cats, probs, start=1):
            df[f'classe{i}'] = cat
            df[f'proba{i}'] = prob

        
        # df['classe1'] = cat1
        # df['classe2'] = cat2
        # df['classe3'] = cat3
        # df['classe4'] = cat4
        # df['classe5'] = cat5
        # df['proba1'] = proba1
        # df['proba2'] = proba2
        # df['proba3'] = proba3
        # df['proba4'] = proba4
        # df['proba5'] = proba5
        df['full_name'] = self.create_full_name(df)

        return df

     def create_full_name(self, df):
        full_name = []

        for cat in df[f'classe{self.max_depth}']:
            values = cat.split("-")
            labels = [self.labels['label1_name'][values[0]]]
            for i in range(2, max_depth+1):
                labels.append(self.labels[f'label{i}_name']["-".join(values[:i]])
            
            name = " -> ".join(labels)


            # name = " -> ".join([
            #     self.labels['label1_name'][cat1],
            #     self.labels['label2_name'][cat2],
            #     self.labels['label3_name'][cat3],
            #     self.labels['label4_name'][cat4],
            #     self.labels['label5_name'][cat5]
            # ])

            full_name.append(name)

        return full_name

In [61]:
labels.keys()

dict_keys(['label_1', 'label_1_name', 'label_1_inverse', 'label_1_count', 'label_2', 'label_2_name', 'label_2_inverse', 'label_2_count', 'label_3', 'label_3_name', 'label_3_inverse', 'label_3_count', 'label_4', 'label_4_name', 'label_4_inverse', 'label_4_count'])

In [57]:
pred[0]

array([3.9826529e-05, 1.4984391e-02, 1.1820159e-02], dtype=float32)