### 1. Setup

In [1]:
# Impor library standar dan pengecekan versi
import sys
import sklearn
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
import pandas as pd

# Impor untuk plotting
import matplotlib as mpl
import matplotlib.pyplot as plt

# --- Pengecekan Versi ---
assert sys.version_info >= (3, 5), "Dibutuhkan Python 3.5 atau lebih baru"
assert sklearn.__version__ >= "0.20", "Dibutuhkan Scikit-Learn 0.20 atau lebih baru"
assert tf.__version__ >= "2.0", "Dibutuhkan TensorFlow 2.0 atau lebih baru"

# --- Pengaturan untuk Hasil yang Stabil dan Plot ---
np.random.seed(42)
tf.random.set_seed(42)

mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# --- Direktori untuk Menyimpan Gambar ---
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "data"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    """Fungsi untuk menyimpan gambar plot."""
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Menyimpan gambar:", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

print("Setup selesai.")

2025-06-20 16:30:31.005657: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750411831.026482   58469 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750411831.032915   58469 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-20 16:30:31.053199: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Setup selesai.


### 2. Data API

In [2]:
# Bagian ini mendemonstrasikan dasar-dasar penggunaan `tf.data.Dataset` untuk membuat pipeline data yang efisien.

print("\n--- Memulai Bagian Data API ---")

# Membuat dataset dari tensor di memori
X = tf.range(10)
dataset = tf.data.Dataset.from_tensor_slices(X)
print("Dataset dari from_tensor_slices(tf.range(10)):", dataset)

# Cara lain yang ekuivalen
dataset = tf.data.Dataset.range(10)
print("Dataset dari range(10):", dataset)

# Iterasi melalui dataset
print("\nItem dalam dataset awal:")
for item in dataset:
    print(item)


--- Memulai Bagian Data API ---
Dataset dari from_tensor_slices(tf.range(10)): <_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>
Dataset dari range(10): <_RangeDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>

Item dalam dataset awal:
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(5, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)
tf.Tensor(9, shape=(), dtype=int64)


I0000 00:00:1750411838.526482   58469 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4057 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5
2025-06-20 16:30:38.826957: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


#### Chaining (merangkai) transformasi pada dataset
repeat() mengulang data, batch() mengelompokkan data

In [3]:
dataset = dataset.repeat(3).batch(7)
print("\nDataset setelah repeat(3) dan batch(7):")
for item in dataset:
    print(item)

# Transformasi lainnya seperti map, apply, filter
dataset = dataset.map(lambda x: x * 2)  # Mengalikan setiap elemen dengan 2
print("\nDataset setelah map(lambda x: x * 2):")
for item in dataset:
    print(item)

# Mengembalikan batch menjadi elemen individual
dataset = dataset.unbatch()
print("\nDataset setelah unbatch():")
for item in dataset.take(5): # Hanya menampilkan 5 item pertama
    print(item)

# Menyaring dataset
dataset = dataset.filter(lambda x: x < 10)  # Hanya menyimpan item < 10
print("\nDataset setelah filter(lambda x: x < 10):")
for item in dataset.take(5):
    print(item)


Dataset setelah repeat(3) dan batch(7):
tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int64)
tf.Tensor([7 8 9 0 1 2 3], shape=(7,), dtype=int64)
tf.Tensor([4 5 6 7 8 9 0], shape=(7,), dtype=int64)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int64)
tf.Tensor([8 9], shape=(2,), dtype=int64)

Dataset setelah map(lambda x: x * 2):
tf.Tensor([ 0  2  4  6  8 10 12], shape=(7,), dtype=int64)
tf.Tensor([14 16 18  0  2  4  6], shape=(7,), dtype=int64)
tf.Tensor([ 8 10 12 14 16 18  0], shape=(7,), dtype=int64)
tf.Tensor([ 2  4  6  8 10 12 14], shape=(7,), dtype=int64)
tf.Tensor([16 18], shape=(2,), dtype=int64)


2025-06-20 16:30:38.864240: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence



Dataset setelah unbatch():
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)

Dataset setelah filter(lambda x: x < 10):
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)


2025-06-20 16:30:38.928981: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


#### Mengacak (shuffle) dataset

In [4]:
# Mengacak (shuffle) dataset
tf.random.set_seed(42)
dataset = tf.data.Dataset.range(10).repeat(3)
# buffer_size penting untuk pengacakan yang baik
dataset = dataset.shuffle(buffer_size=5, seed=42).batch(7)
print("\nDataset setelah shuffle() dan batch():")
for item in dataset:
    print(item)

W0000 00:00:1750411839.172870   58646 gpu_backend_lib.cc:579] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  ./cuda_sdk_lib
  ipykernel_launcher.runfiles/cuda_nvcc
  ipykern/cuda_nvcc
  
  /usr/local/cuda
  /home/ardi/miniconda3/lib/python3.12/site-packages/tensorflow/python/platform/../../../nvidia/cuda_nvcc
  /home/ardi/miniconda3/lib/python3.12/site-packages/tensorflow/python/platform/../../../../nvidia/cuda_nvcc
  /home/ardi/miniconda3/lib/python3.12/site-packages/tensorflow/python/platform/../../cuda
  .
You can choose the search directory by setting xla_gpu_cuda_data_dir in HloModule's DebugOptions.  For most apps, setting the environment variable XLA_FLAGS=--xla_gpu_cuda_data_dir=/path/to/cuda will work.



Dataset setelah shuffle() dan batch():
tf.Tensor([0 1 6 5 7 3 9], shape=(7,), dtype=int64)
tf.Tensor([8 2 1 0 4 6 4], shape=(7,), dtype=int64)
tf.Tensor([7 2 5 9 2 1 3], shape=(7,), dtype=int64)
tf.Tensor([4 3 8 7 9 5 0], shape=(7,), dtype=int64)
tf.Tensor([8 6], shape=(2,), dtype=int64)


#### Pipeline Input dari File CSV

In [5]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# --- Memuat dan Mempersiapkan Dataset California Housing ---
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

# Melakukan penskalaan (scaling) pada fitur
scaler = StandardScaler()
scaler.fit(X_train)
X_mean = scaler.mean_
X_std = scaler.scale_

#### Menyimpan Dataset ke Beberapa File CSV

In [6]:
def save_to_multiple_csv_files(data, name_prefix, header=None, n_parts=10):
    housing_dir = os.path.join("datasets", "housing")
    os.makedirs(housing_dir, exist_ok=True)
    path_format = os.path.join(housing_dir, f"my_{name_prefix}_{{:02d}}.csv")

    filepaths = []
    m = len(data)
    for file_idx, row_indices in enumerate(np.array_split(np.arange(m), n_parts)):
        part_csv = path_format.format(file_idx)
        filepaths.append(part_csv)
        with open(part_csv, "wt", encoding="utf-8") as f:
            if header is not None:
                f.write(header)
                f.write("\n")
            for row_idx in row_indices:
                f.write(",".join([repr(col) for col in data[row_idx]]))
                f.write("\n")
    return filepaths

In [7]:
# Menggabungkan fitur dan target, lalu menyimpannya
train_data = np.c_[X_train, y_train]
valid_data = np.c_[X_valid, y_valid]
test_data = np.c_[X_test, y_test]
header_cols = housing.feature_names + ["MedianHouseValue"]
header = ",".join(header_cols)

train_filepaths = save_to_multiple_csv_files(train_data, "train", header, n_parts=20)
valid_filepaths = save_to_multiple_csv_files(valid_data, "valid", header, n_parts=10)
test_filepaths = save_to_multiple_csv_files(test_data, "test", header, n_parts=10)

print("\nDataset telah disimpan ke dalam beberapa file CSV.")
print("Contoh path file training:", train_filepaths[0])


Dataset telah disimpan ke dalam beberapa file CSV.
Contoh path file training: datasets/housing/my_train_00.csv


#### Membangun Pipeline Input

In [8]:
# 1. Membuat dataset dari path file
filepath_dataset = tf.data.Dataset.list_files(train_filepaths, seed=42)

In [9]:
# 2. Membaca file secara paralel dengan `interleave`
n_readers = 5
dataset = filepath_dataset.interleave(
    lambda filepath: tf.data.TextLineDataset(filepath).skip(1),
    cycle_length=n_readers)

print("\n5 baris pertama dari dataset yang digabungkan:")
for line in dataset.take(5):
    print(line.numpy())


5 baris pertama dari dataset yang digabungkan:
b'np.float64(4.7361),np.float64(7.0),np.float64(7.464968152866242),np.float64(1.1178343949044587),np.float64(846.0),np.float64(2.694267515923567),np.float64(34.49),np.float64(-117.27),np.float64(1.745)'
b'np.float64(3.6641),np.float64(17.0),np.float64(5.577142857142857),np.float64(1.1542857142857144),np.float64(511.0),np.float64(2.92),np.float64(40.85),np.float64(-121.07),np.float64(0.808)'
b'np.float64(4.5909),np.float64(16.0),np.float64(5.475877192982456),np.float64(1.0964912280701755),np.float64(1357.0),np.float64(2.9758771929824563),np.float64(33.63),np.float64(-117.71),np.float64(2.418)'
b'np.float64(3.6875),np.float64(44.0),np.float64(4.524475524475524),np.float64(0.993006993006993),np.float64(457.0),np.float64(3.195804195804196),np.float64(34.04),np.float64(-118.15),np.float64(1.625)'
b'np.float64(2.3),np.float64(25.0),np.float64(5.828178694158075),np.float64(0.9587628865979382),np.float64(909.0),np.float64(3.1237113402061856),np.f

In [10]:
# 3. Fungsi untuk pra-pemrosesan (parsing dan scaling)
n_inputs = 8  # Jumlah fitur input
@tf.function
def preprocess(line):
    defs = [0.] * n_inputs + [tf.constant([], dtype=tf.float32)]
    fields = tf.io.decode_csv(line, record_defaults=defs)
    x = tf.stack(fields[:-1])
    y = tf.stack(fields[-1:])
    return (x - X_mean) / X_std, y

# Contoh hasil pra-pemrosesan
print("\nHasil pra-pemrosesan satu baris data:")
print(preprocess(b'4.2083,44.0,5.3232,0.9171,846.0,2.3370,37.47,-122.2,2.782'))


Hasil pra-pemrosesan satu baris data:
(<tf.Tensor: shape=(8,), dtype=float32, numpy=
array([ 0.16579157,  1.216324  , -0.05204565, -0.39215982, -0.5277444 ,
       -0.2633488 ,  0.8543046 , -1.3072058 ], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([2.782], dtype=float32)>)


In [11]:
# 4. Fungsi pabrik untuk membuat pipeline data lengkap
def csv_reader_dataset(filepaths, repeat=1, n_readers=5,
                       n_read_threads=None, shuffle_buffer_size=10000,
                       n_parse_threads=5, batch_size=32):
    dataset = tf.data.Dataset.list_files(filepaths).repeat(repeat)
    dataset = dataset.interleave(
        lambda filepath: tf.data.TextLineDataset(filepath).skip(1),
        cycle_length=n_readers, num_parallel_calls=n_read_threads)
    dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(preprocess, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1) # prefetch untuk kinerja yang lebih baik

In [12]:
# Membuat set data training, validasi, dan testing
train_set = csv_reader_dataset(train_filepaths, repeat=None)
valid_set = csv_reader_dataset(valid_filepaths)
test_set = csv_reader_dataset(test_filepaths)

#### Melatih Model dengan Pipeline Data

In [13]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=X_train.shape[1:]),
    keras.layers.Dense(1),
])

model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
# Mengurangi epoch agar berjalan lebih cepat
epochs = 5
batch_size = 32
print(f"\nMelatih model selama {epochs} epoch...")
history = model.fit(train_set, steps_per_epoch=len(X_train) // batch_size, epochs=epochs,
                    validation_data=valid_set)

print("\nEvaluasi model pada test set:")
loss = model.evaluate(test_set, steps=len(X_test) // batch_size)
print("Loss pada test set:", loss)


Melatih model selama 5 epoch...
Epoch 1/5


2025-06-20 16:30:43.445609: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: INVALID_ARGUMENT: Field 0 in record 0 is not a valid float: np.float64(5.0)
	 [[{{function_node __inference_preprocess_290}}{{node DecodeCSV}}]]
	 [[IteratorGetNext]]
2025-06-20 16:30:43.445720: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 8240808479259398707
2025-06-20 16:30:43.445769: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 14422924542292116735


InvalidArgumentError: Graph execution error:

Detected at node DecodeCSV defined at (most recent call last):
<stack traces unavailable>
Detected at node DecodeCSV defined at (most recent call last):
<stack traces unavailable>
2 root error(s) found.
  (0) INVALID_ARGUMENT:  Field 0 in record 0 is not a valid float: np.float64(5.0)
	 [[{{node DecodeCSV}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_2]]
  (1) INVALID_ARGUMENT:  Field 0 in record 0 is not a valid float: np.float64(5.0)
	 [[{{node DecodeCSV}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_multi_step_on_iterator_704]

### 3. Format Biner TFRecord
Bagian ini menjelaskan cara menggunakan format TFRecord yang efisien untuk menyimpan dan membaca data.

In [None]:
# --- Menulis dan Membaca File TFRecord Sederhana ---
with tf.io.TFRecordWriter("my_data.tfrecord") as f:
    f.write(b"Ini adalah record pertama")
    f.write(b"Dan ini adalah record kedua")

filepaths = ["my_data.tfrecord"]
dataset = tf.data.TFRecordDataset(filepaths)
print("\nMembaca dari file TFRecord:")
for item in dataset:
    print(item)

#### Kompresi File TFRecord

In [None]:
# --- Kompresi File TFRecord ---
options = tf.io.TFRecordOptions(compression_type="GZIP")
with tf.io.TFRecordWriter("my_compressed.tfrecord", options) as f:
    f.write(b"Ini adalah record terkompresi")

dataset = tf.data.TFRecordDataset(["my_compressed.tfrecord"], compression_type="GZIP")
print("\nMembaca dari file TFRecord terkompresi:")
for item in dataset:
    print(item)

#### TensorFlow Protobufs (tf.train.Example)

In [None]:
# Menggunakan tf.train.Example untuk menyimpan data terstruktur
BytesList = tf.train.BytesList
FloatList = tf.train.FloatList
Int64List = tf.train.Int64List
Feature = tf.train.Feature
Features = tf.train.Features
Example = tf.train.Example

person_example = Example(
    features=Features(
        feature={
            "name": Feature(bytes_list=BytesList(value=[b"Alice"])),
            "id": Feature(int64_list=Int64List(value=[123])),
            "emails": Feature(bytes_list=BytesList(value=[b"a@b.com", b"c@d.com"]))
        }))

with tf.io.TFRecordWriter("my_contacts.tfrecord") as f:
    f.write(person_example.SerializeToString())
print("\nContoh tf.train.Example berhasil ditulis ke file.")

In [None]:
feature_description = {
    "name": tf.io.FixedLenFeature([], tf.string, default_value=""),
    "id": tf.io.FixedLenFeature([], tf.int64, default_value=0),
    "emails": tf.io.VarLenFeature(tf.string), # Fitur dengan panjang variabel
}

print("\nMembaca dan mem-parsing tf.train.Example:")
for serialized_example in tf.data.TFRecordDataset(["my_contacts.tfrecord"]):
    parsed_example = tf.io.parse_single_example(serialized_example,
                                                feature_description)
    print(parsed_example)

In [None]:
# Mengubah sparse tensor menjadi dense tensor
print("\nEmail (dense):", tf.sparse.to_dense(parsed_example["emails"], default_value=b"").numpy())

In [None]:
img = load_sample_images()["images"][0]
data = tf.io.encode_jpeg(img)
example_with_image = Example(features=Features(feature={
    "image": Feature(bytes_list=BytesList(value=[data.numpy()]))
}))
serialized_example_img = example_with_image.SerializeToString()

In [None]:
# Membaca dan mendekode gambar
feature_description_img = { "image": tf.io.VarLenFeature(tf.string) }
parsed_example_img = tf.io.parse_single_example(serialized_example_img, feature_description_img)
decoded_img_tensor = tf.io.decode_image(parsed_example_img["image"].values[0])

print("\nUkuran tensor gambar yang didekode:", decoded_img_tensor.shape)

### 4. Preprocessing dengan Features API (Feature Columns)

In [None]:
# Memuat data housing lagi (versi dengan fitur kategorikal)
housing = load_housing_data()
print("Data housing (head):\n", housing.head())

#### Membuat Feature Columns

In [None]:
# 1. Numeric Column dengan normalisasi
age_mean, age_std = X_mean[1], X_std[1]
housing_median_age = tf.feature_column.numeric_column(
    "housing_median_age", normalizer_fn=lambda x: (x - age_mean) / age_std)

In [None]:
# 2. Bucketized Column
median_income = tf.feature_column.numeric_column("median_income")
bucketized_income = tf.feature_column.bucketized_column(
    median_income, boundaries=[1.5, 3., 4.5, 6.])

In [None]:
# 3. Categorical Vocabulary Column
ocean_prox_vocab = ['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN']
ocean_proximity = tf.feature_column.categorical_column_with_vocabulary_list(
    "ocean_proximity", ocean_prox_vocab)

In [None]:
# 4. Crossed Column
bucketized_age = tf.feature_column.bucketized_column(
    housing_median_age, boundaries=[-1., -0.5, 0., 0.5, 1.])
age_and_ocean_proximity = tf.feature_column.crossed_column(
    [bucketized_age, ocean_proximity], hash_bucket_size=100)

In [None]:
# 5. Indicator Column (untuk one-hot encoding)
ocean_proximity_one_hot = tf.feature_column.indicator_column(ocean_proximity)

In [None]:
# 6. Embedding Column (untuk representasi padat dari fitur kategorikal)
ocean_proximity_embed = tf.feature_column.embedding_column(ocean_proximity, dimension=2)


#### Menggunakan Feature Columns untuk Parsing dan Pelatihan

In [None]:
median_house_value = tf.feature_column.numeric_column("median_house_value")
columns = [housing_median_age, median_house_value]
feature_descriptions = tf.feature_column.make_parse_example_spec(columns)
print("\nDeskripsi fitur yang dihasilkan:\n", feature_descriptions)

In [None]:
# Tulis subset data ke TFRecord untuk demonstrasi
with tf.io.TFRecordWriter("my_data_with_features.tfrecords") as f:
    for x, y in zip(X_train[:, 1:2], y_train):
        example = Example(features=Features(feature={
            "housing_median_age": Feature(float_list=FloatList(value=[x])),
            "median_house_value": Feature(float_list=FloatList(value=[y]))
        }))
        f.write(example.SerializeToString())

In [None]:
# Fungsi untuk mem-parsing example
def parse_examples(serialized_examples):
    examples = tf.io.parse_example(serialized_examples, feature_descriptions)
    targets = examples.pop("median_house_value")
    return examples, targets

In [None]:
# Buat dataset
dataset = tf.data.TFRecordDataset(["my_data_with_features.tfrecords"])
dataset = dataset.repeat().shuffle(10000).batch(batch_size).map(parse_examples)

In [None]:
# Buat model menggunakan `DenseFeatures` layer
columns_without_target = [housing_median_age]
model = keras.models.Sequential([
    keras.layers.DenseFeatures(feature_columns=columns_without_target),
    keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
print("\nMelatih model dengan DenseFeatures layer:")
model.fit(dataset, steps_per_epoch=len(X_train) // batch_size, epochs=5)

### 5. TF Transform

In [None]:
if tft is not None:
    # pra-pemrosesan data yang perlu di-fit terlebih dahulu.

    print("\n--- Memulai Bagian TF Transform ---")
    
    def preprocess_tft(inputs):
        """Fungsi pra-pemrosesan untuk TF Transform."""
        median_age = inputs["housing_median_age"]
        ocean_proximity = inputs["ocean_proximity"]
        
        # tft.scale_to_z_score akan menghitung mean dan std dev dari seluruh dataset
        standardized_age = tft.scale_to_z_score(median_age) 
        # tft.compute_and_apply_vocabulary akan membuat vocabulary dari seluruh dataset
        ocean_proximity_id = tft.compute_and_apply_vocabulary(ocean_proximity)
        
        return {
            "standardized_median_age": standardized_age,
            "ocean_proximity_id": ocean_proximity_id
        }
    print("Fungsi pra-pemrosesan TF Transform telah didefinisikan (tidak dijalankan).")

### 6. TensorFlow Datasets (TFDS)

In [None]:
if tfds is not None:
    # Bagian ini menunjukkan cara mudah memuat dataset standar menggunakan library TFDS.

    print("\n--- Memulai Bagian TensorFlow Datasets (TFDS) ---")

    # Memuat dataset MNIST
    datasets = tfds.load(name="mnist", batch_size=32, as_supervised=True)
    mnist_train = datasets["train"].repeat().prefetch(1)
    
    # Menampilkan beberapa gambar
    print("\nContoh gambar dari dataset MNIST yang dimuat via TFDS:")
    plt.figure(figsize=(6, 3))
    for images, labels in mnist_train.take(1):
        for index in range(5):
            plt.subplot(1, 5, index + 1)
            image = images[index, ..., 0]
            label = labels[index].numpy()
            plt.imshow(image, cmap="binary")
            plt.title(label)
            plt.axis("off")
    plt.show()

    # Melatih model pada dataset dari TFDS
    keras.backend.clear_session()
    np.random.seed(42)
    tf.random.set_seed(42)
    
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=[28, 28, 1]),
        keras.layers.Lambda(lambda images: tf.cast(images, tf.float32)), # Cast ke float
        keras.layers.Dense(10, activation="softmax")
    ])
    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                  metrics=["accuracy"])
    
    print("\nMelatih model pada dataset MNIST dari TFDS:")
    model.fit(mnist_train, steps_per_epoch=60000 // 32, epochs=5)

In [None]:
if hub is not None:
    # Bagian ini menunjukkan cara menggunakan model pre-trained dari TensorFlow Hub.
    
    print("\n--- Memulai Bagian TensorFlow Hub ---")

    keras.backend.clear_session()
    np.random.seed(42)
    tf.random.set_seed(42)

    # Menggunakan layer embedding teks dari TF Hub
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim50/2",
                               output_shape=[50], input_shape=[], dtype=tf.string)

    model = keras.Sequential([
        hub_layer,
        keras.layers.Dense(16, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    
    print("\nRingkasan model dengan layer dari TF Hub:")
    model.summary()

    # Contoh penggunaan layer
    sentences = tf.constant(["It was a great movie", "The actors were amazing"])
    embeddings = hub_layer(sentences)
    print("\nContoh output embeddings dari TF Hub layer:")
    print(embeddings)