# Notebook de Exploración de la Información

In [1]:
import os
import requests
                                                
## download the dataset
# Directory of the raw data files
_data_root = '../data/covertype'

# Path to the raw training data
_data_filepath = os.path.join(_data_root, 'covertype_train.csv')

# Download data
os.makedirs(_data_root, exist_ok=True)

if not os.path.isfile(_data_filepath):
    #https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/
    url = 'https://docs.google.com/uc?export= \
    download&confirm={{VALUE}}&id=1lVF1BCWLH4eXXV_YOJzjR7xZjj-wAGj9'
    r = requests.get(url, allow_redirects=True, stream=True)
    open(_data_filepath, 'wb').write(r.content)

In [2]:
import pandas as pd

# Cargar el archivo directamente
df = pd.read_csv(_data_filepath)

# Mostrar las primeras filas
df.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Wilderness_Area,Soil_Type,Cover_Type
0,2991,119,7,67,11,1015,233,234,133,1570,Commanche,C7202,1
1,2876,3,18,485,71,2495,192,202,144,1557,Commanche,C7757,1
2,3171,315,2,277,9,4374,213,237,162,1052,Rawah,C7745,0
3,3087,342,13,190,31,4774,193,221,166,752,Rawah,C7745,0
4,2835,158,10,212,41,3596,231,242,141,3280,Rawah,C4744,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116203 entries, 0 to 116202
Data columns (total 13 columns):
 #   Column                              Non-Null Count   Dtype 
---  ------                              --------------   ----- 
 0   Elevation                           116203 non-null  int64 
 1   Aspect                              116203 non-null  int64 
 2   Slope                               116203 non-null  int64 
 3   Horizontal_Distance_To_Hydrology    116203 non-null  int64 
 4   Vertical_Distance_To_Hydrology      116203 non-null  int64 
 5   Horizontal_Distance_To_Roadways     116203 non-null  int64 
 6   Hillshade_9am                       116203 non-null  int64 
 7   Hillshade_Noon                      116203 non-null  int64 
 8   Hillshade_3pm                       116203 non-null  int64 
 9   Horizontal_Distance_To_Fire_Points  116203 non-null  int64 
 10  Wilderness_Area                     116203 non-null  object
 11  Soil_Type                           116

Selección de Caractarísticas

In [4]:
df.dtypes.value_counts()

int64     11
object     2
dtype: int64

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split

X = df.drop(columns = 'Cover_Type').select_dtypes(np.number)
y = df['Cover_Type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

# Verificar tamaños
print("Tamaño del conjunto de entrenamiento:", X_train.shape)
print("Tamaño del conjunto de prueba:", X_test.shape)
print("Número de etiquetas a clasificar:", y_train.nunique())

Tamaño del conjunto de entrenamiento: (81342, 10)
Tamaño del conjunto de prueba: (34861, 10)
Número de etiquetas a clasificar: 7


## Versión Inicial Clasificador

Se van a utilizar solo las primeras `10` características

In [6]:
from sklearn.feature_selection import SelectKBest

sk = SelectKBest(k = 10)

X_new = sk.fit_transform(X_train, y_train)

X_new.shape

(81342, 10)

In [7]:
pd.DataFrame({'feature':sk.feature_names_in_, 'score':sk.scores_}).sort_values(by = 'score', ascending = False).head(10)

Unnamed: 0,feature,score
0,Elevation,21867.474316
5,Horizontal_Distance_To_Roadways,1444.611542
2,Slope,1109.94481
9,Horizontal_Distance_To_Fire_Points,1042.029883
6,Hillshade_9am,451.350852
7,Hillshade_Noon,420.486309
3,Horizontal_Distance_To_Hydrology,346.658711
4,Vertical_Distance_To_Hydrology,176.843658
8,Hillshade_3pm,124.270371
1,Aspect,64.218787


## Procesamiento con TFX

In [8]:
import os
import tfx.v1 as tfx  # Usamos la API v1 de TFX
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.orchestration.metadata import sqlite_metadata_connection_config

2025-03-04 23:32:22.078358: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-04 23:32:22.136111: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-04 23:32:22.137832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-03-04 23:32:22.137840: I tensorflow/compiler/xla/stream_executor/cuda/cudar

In [9]:
# Directorio de salida para TFX (donde se guardarán los artefactos)
output_dir = "tfx_pipeline/"
metadata_path = os.path.join(output_dir, "metadata.db")


In [10]:
# Crear contexto de ejecución interactivo
context = InteractiveContext(metadata_connection_config=sqlite_metadata_connection_config(metadata_path))



In [11]:
_data_filepath

'../data/covertype/covertype_train.csv'

In [12]:
from tfx.components import CsvExampleGen

example_gen = CsvExampleGen(input_base='../data/')

In [13]:
from tfx.proto import example_gen_pb2

input_config = example_gen_pb2.Input(splits=[
    example_gen_pb2.Input.Split(name="train", pattern="covertype_train.csv")
])

example_gen = tfx.components.CsvExampleGen(
    input_base="../data/covertype/",
    input_config=input_config
)

context.run(example_gen)






0,1
.execution_id,12
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x77eadb608070.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']../data/covertype/['input_config']{  ""splits"": [  {  ""name"": ""train"",  ""pattern"": ""covertype_train.csv""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:train,num_files:1,total_bytes:6405459,xor_checksum:1741060275,sum_checksum:1741060275"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']../data/covertype/['input_config']{  ""splits"": [  {  ""name"": ""train"",  ""pattern"": ""covertype_train.csv""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:train,num_files:1,total_bytes:6405459,xor_checksum:1741060275,sum_checksum:1741060275"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],../data/covertype/
['input_config'],"{  ""splits"": [  {  ""name"": ""train"",  ""pattern"": ""covertype_train.csv""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:train,num_files:1,total_bytes:6405459,xor_checksum:1741060275,sum_checksum:1741060275"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


In [14]:
example_gen

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']../data/covertype/['input_config']{  ""splits"": [  {  ""name"": ""train"",  ""pattern"": ""covertype_train.csv""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:train,num_files:1,total_bytes:6405459,xor_checksum:1741060275,sum_checksum:1741060275"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x77eadb8f4130.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12) at 0x77eb2d5e96a0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],../data/covertype/
['input_config'],"{  ""splits"": [  {  ""name"": ""train"",  ""pattern"": ""covertype_train.csv""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:train,num_files:1,total_bytes:6405459,xor_checksum:1741060275,sum_checksum:1741060275"


||1# import logging
logging.getLogger().setLevel(logging.INFO)

# Crear componente StatisticsGen (usa los datos de CsvExampleGen)
statistics_gen = tfx.components.StatisticsGen(
    examples=example_gen.outputs["examples"]
)

# Ejecutar el componente
context.run(statistics_gen)


In [16]:
import tensorflow_data_validation as tfdv

# Obtener la ubicación del dataset transformado en TFRecords
stats_uri = statistics_gen.outputs["statistics"].get()[0].uri

# Obtener la ruta del archivo dentro del directorio de estadísticas
stats_file = os.path.join(stats_uri, "Split-train", "FeatureStats.pb")

In [17]:
import os
import glob

# Obtener la URI de salida de StatisticsGen
stats_uri = statistics_gen.outputs["statistics"].get()[0].uri

# Listar los archivos generados dentro del directorio de estadísticas
stats_files = glob.glob(os.path.join(stats_uri, "Split-train", "*"))
print("Archivos generados por StatisticsGen:", stats_files)


Archivos generados por StatisticsGen: ['/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/StatisticsGen/statistics/13/Split-train/FeatureStats.pb']


In [18]:
# Verificar si el archivo existe
if os.path.exists(stats_file):
    print(f"Cargando estadísticas desde: {stats_file}")
    stats = tfdv.load_statistics(stats_file)
    tfdv.visualize_statistics(stats)
else:
    print("⚠️ Archivo de estadísticas no encontrado.")

Cargando estadísticas desde: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/StatisticsGen/statistics/13/Split-train/FeatureStats.pb
Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:root:File /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/StatisticsGen/statistics/13/Split-train/FeatureStats.pb did not look like a TFRecord. Try reading as a plain file.


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 1: invalid continuation byte

In [19]:
stats_file

'/tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/StatisticsGen/statistics/13/Split-train/FeatureStats.pb'

In [20]:
import tensorflow as tf

dataset = tf.data.TFRecordDataset([stats_file])
for record in dataset.take(1):
    print("Registro leído:", record)


2025-03-04 23:46:41.364134: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2025-03-04 23:46:41.364152: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2025-03-04 23:46:41.364168: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist
2025-03-04 23:46:41.364378: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


DataLossError: {{function_node __wrapped__IteratorGetNext_output_types_1_device_/job:localhost/replica:0/task:0/device:CPU:0}} corrupted record at 0 (Is this even a TFRecord file?) [Op:IteratorGetNext]

In [21]:
import tensorflow as tf
import os

# Ubicación de un TFRecord generado por CsvExampleGen
example_uri = example_gen.outputs["examples"].get()[0].uri
tfrecord_file = os.path.join(example_uri, "Split-train", "part-00000-of-00001")

print("Intentando leer TFRecord:", tfrecord_file)
dataset = tf.data.TFRecordDataset([tfrecord_file])
for record in dataset.take(1):
    print("Registro leído:", record)


Intentando leer TFRecord: /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12/Split-train/part-00000-of-00001


NotFoundError: {{function_node __wrapped__IteratorGetNext_output_types_1_device_/job:localhost/replica:0/task:0/device:CPU:0}} /tmp/tfx-interactive-2025-03-04T23_32_32.954257-dwiz4_u6/CsvExampleGen/examples/12/Split-train/part-00000-of-00001; No such file or directory [Op:IteratorGetNext]