In [2]:
import os
import json
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import classification_report, f1_score

from hmc.dataset import HMCDataset

In [4]:
from hmc.model import ClassificationModel

In [5]:
base_path = "/mnt/disks/data/fma/trains"
sample_id = "hierarchical_tworoots"


train_path = os.path.join(base_path, sample_id)
torch_path = os.path.join(train_path,'torch')
metadata_path = os.path.join(train_path,"metadata.json")
labels_path = os.path.join(train_path,"labels.json")

model_id = "20240807_193356"

model_path = os.path.join(train_path,'hmc_torch_effnet', model_id)


args = pd.Series({
    "batch_size":32,
    "epochs":10,
    "dropout":0.5,
    'patience':1,
    'max_queue_size':64,
    "labels_path": labels_path,
    "metadata_path": metadata_path,
    "trainset_pattern": os.path.join(torch_path,'train'),
    "testset_pattern": os.path.join(torch_path,'test'),
    "valset_pattern": os.path.join(torch_path,'val'),
    "model_path":model_path
})


In [5]:
binary_model = os.path.join(args.model_path, 'best-weights.pth')

In [6]:
with open(args.metadata_path, 'r') as f:
    metadata = json.loads(f.read())
    print(metadata)

with open(args.labels_path, 'r') as f:
    labels = json.loads(f.read())


params: dict = {
    'levels_size': metadata['levels_size'],
    'sequence_size': metadata['sequence_size']
}

print(params)
model = build_model(**params)

{'sequence_size': 1280, 'max_depth': 4, 'levels_size': [2, 29, 15, 2], 'val_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots/tfrecords/val', 'train_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots/tfrecords/train', 'test_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots/tfrecords/test', 'val_torch_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots/torch/val', 'train_torch_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots/torch/train', 'test_torch_path': '/mnt/disks/data/fma/trains/hierarchical_tworoots/torch/test', 'val_csv': '/mnt/disks/data/fma/trains/hierarchical_tworoots/val.csv', 'train_csv': '/mnt/disks/data/fma/trains/hierarchical_tworoots/train.csv', 'test_csv': '/mnt/disks/data/fma/trains/hierarchical_tworoots/test.csv', 'trainset_count': 18209, 'validationset_count': 2007, 'testset_count': 4814}
{'levels_size': [2, 29, 15, 2], 'sequence_size': 1280}
size 1792 of level 2
size 1792 of level 3
size 1792 of level 4


2024-08-08 19:18:38.182051: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9681 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:65:00.0, compute capability: 7.5


In [7]:
df_test = HMCDataset(args.testset_pattern, args.epochs, args.batch_size, labels['levels_size']).build(df=True)

2024-08-08 19:18:40.247933: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [8]:
df_test['features'] = df_test.features.apply(lambda x: x['features'].tolist())

In [9]:
model.load_weights(binary_model)

  saveable.load_own_variables(weights_store.get(inner_path))


In [10]:
model.summary()

In [11]:
X_test = df_test.features.values.tolist()

In [12]:
# Exemplo de ajuste manual, se necessário
X_test = np.reshape(X_test, (len(X_test), -1))
print(f"Formato de X_test após reshape: {X_test.shape}")

Formato de X_test após reshape: (4814, 1280)


In [13]:
print(X_test.shape)

(4814, 1280)


In [14]:
x = X_test[:10]

In [15]:
predictions = model.predict(X_test)

[1m  1/151[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 152ms/step

[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step


In [16]:
def apply_threshold(probabilities, threshold=0.5):
    """
    Aplica um limiar para converter probabilidades em valores binários.
    """
    return (probabilities >= threshold).astype(int)

In [28]:
# Limiar para converter probabilidades em binários
threshold = 0.6

threshold = {0:0.7,
             1:0.4,
             2:0.4,
             3:0.4}


# Converte as predições em valores binários
binary_predictions = [apply_threshold(pred, threshold[level]) for level, pred in enumerate(predictions)]

In [74]:
binary_predictions[1].tolist()[0]

[1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [58]:
def create_report_metrics(y_pred, y_true, target_names):
    rerport = classification_report(
        y_true=y_true, 
        y_pred=y_pred,
        output_dict=True,
        zero_division=0,
        target_names=target_names
    )

    # Converter o dicionário em DataFrame
    df_report = pd.DataFrame(rerport).transpose()

    return df_report

In [59]:
def create_reports(results, y_true, labels, max_depth):
    fscore = [[] for _ in range(max_depth)]
    reports = {}
    for i in range(max_depth):
        level_name = f'level{i+1}'
        y_test_bin = [label[level_name].tolist() for label in y_true]
        fscore[i].append(f1_score(results[i], y_test_bin, average='weighted'))
        reports[i] = create_report_metrics(results[i], y_test_bin, list(labels[level_name].keys()))

    return reports, fscore


In [60]:
list(labels['level1'].keys())

['15', '12']

In [77]:
df_test.labels.iloc[0]['level4']

array([0, 0])

In [61]:
def generete_md(binary_predictions, df_test, labels):

    for idx, binary_label in enumerate(binary_predictions, start=1):
        level_name = f'level{idx}'
    
        y_test_bin = [label[level_name].tolist() for label in df_test.labels]

        rerport = classification_report(y_test_bin, binary_label.tolist(), \
                                        target_names=list(labels[level_name].keys()),\
                                        output_dict=True,  zero_division=0)

        # Converter o dicionário em DataFrame
        df_report = pd.DataFrame(rerport).transpose()

        markdown = df_report.to_markdown()

        # Escrever o markdown em um arquivo
        with open(f'report-tworoots-{idx}.md', 'w') as f:
            f.write(markdown)
        

    

In [62]:
reports, fscore = create_reports(binary_predictions, df_test.labels, labels, 4)

In [64]:
reports[1]

Unnamed: 0,precision,recall,f1-score,support
236,0.229219,0.07872,0.117193,1156.0
27,0.0,0.0,0.0,138.0
297,0.0,0.0,0.0,186.0
58,0.0,0.0,0.0,163.0
25,0.0,0.0,0.0,15.0
66,0.0,0.0,0.0,223.0
495,0.0,0.0,0.0,234.0
182,0.0,0.0,0.0,159.0
183,0.0,0.0,0.0,312.0
286,0.0,0.0,0.0,48.0


In [35]:
generete_md(binary_predictions, df_test)