In [1]:
"""
Skrip ini memuat model Sequence-to-Sequence (Seq2Seq) berarsitektur Encoder-Decoder
dengan mekanisme atensi (attention) yang telah dilatih sebelumnya.
Model digunakan untuk mendekode atau menghasilkan kalimat dari sekuens masukan
menggunakan beam search dengan berbagai penalti dan pascapemrosesan hasil keluaran.

Proses utama meliputi:
1. Pemuatan model dan tokenizer.
2. Definisi fungsi pendukung (loss, attention, masking, dan dekoding).
3. Implementasi beam search dengan penalti panjang, repetisi, dan token wajib.
4. Pascapemrosesan keluaran untuk menghasilkan kalimat yang bersih dan alami.
"""

# --- Inisialisasi Awal ---
import os
# Mengabaikan log TensorFlow (1 = INFO, 2 = WARNING, 3 = ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import warnings
# Mengabaikan peringatan dari library
warnings.filterwarnings("ignore", category=UserWarning)

import json, re, pickle
from collections import Counter
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, GRU, Bidirectional, Dense, Concatenate, Lambda, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# --- Konfigurasi Jalur File ---
INPUT_DIR = "/kaggle/input/seq2seq-kaggle/tensorflow2/default/1"
MODEL_PATH = os.path.join(INPUT_DIR, "seq2seq_model.keras")
TOK_ENC_PATH = os.path.join(INPUT_DIR, "tokenizer_enc.pkl")
TOK_DEC_PATH = os.path.join(INPUT_DIR, "tokenizer_dec.pkl")

# --- Hiperparameter Dekoding ---
BEAM_WIDTH = 5
TOP_K = 5
LENGTH_PENALTY_ALPHA = 0.8
REPETITION_PENALTY = 1.2
NO_REPEAT_NGRAM_SIZE = 3
REQUIRED_TOKEN_PENALTY = -100.0

# --- Parameter Model Tetap (berdasarkan hasil pelatihan) ---
max_enc_len = 7
max_dec_len = 12

# --- Pemuatan Tokenizer ---
print("Loading tokenizers...")
try:
    with open(TOK_ENC_PATH, "rb") as f:
        enc_tok = pickle.load(f)
    with open(TOK_DEC_PATH, "rb") as f:
        dec_tok = pickle.load(f)
    print("Tokenizers loaded successfully.")
except FileNotFoundError:
    print(f"Error: Tokenizer files not found. Searched in '{INPUT_DIR}'")
    print("Pastikan file .pkl ada dan path INPUT_DIR sudah benar.")
    raise

# --- Derivasi Informasi dari Tokenizer ---
num_enc_tokens = len(enc_tok.word_index) + 1
num_dec_tokens = len(dec_tok.word_index) + 1
rev_dec_index = {v: k for k, v in dec_tok.word_index.items()}
rev_dec_index[0] = "<pad>"
start_tok = dec_tok.word_index.get("<start>")
end_tok = dec_tok.word_index.get("<end>")

print(f"Encoder Vocab: {num_enc_tokens} | Decoder Vocab: {num_dec_tokens}")
print(f"Max Seq Lens (Enc/Dec): {max_enc_len} / {max_dec_len}")

# --- Fungsi Pendukung untuk Memuat Model ---
LABEL_SMOOTH = 0.1  # Nilai label smoothing (harus sama dengan saat pelatihan)

def smooth_sparse_cce(y_true, y_pred):
    """
    Fungsi loss kustom Sparse Categorical Cross-Entropy dengan label smoothing.
    Mengabaikan kontribusi loss dari token padding.
    """
    y_true_squeezed = tf.squeeze(tf.cast(y_true, tf.int32), axis=-1)
    y_true_oh = tf.one_hot(y_true_squeezed, depth=num_dec_tokens)
    y_true_sm = y_true_oh * (1.0 - LABEL_SMOOTH) + LABEL_SMOOTH / tf.cast(num_dec_tokens, tf.float32)
    cce = tf.keras.losses.categorical_crossentropy(y_true_sm, y_pred)
    mask = tf.cast(tf.not_equal(y_true_squeezed, 0), tf.float32)
    return tf.reduce_sum(cce * mask) / (tf.reduce_sum(mask) + 1e-9)

def create_encoder_mask(inputs):
    """Membuat mask untuk mengabaikan token padding pada input encoder."""
    return tf.cast(tf.not_equal(inputs, 0), tf.float32)

def dot_attention_fn(args):
    """
    Menghitung atensi (dot-product attention) antara keluaran decoder dan encoder.
    Menghasilkan vektor konteks sebagai hasil perkalian bobot atensi dengan keluaran encoder.
    """
    dec_out_tensor, enc_out_tensor, enc_mask_tensor = args
    scores = tf.matmul(dec_out_tensor, enc_out_tensor, transpose_b=True)
    mask = tf.expand_dims(enc_mask_tensor, axis=1)
    scores += (1.0 - mask) * -1e9
    attn = tf.nn.softmax(scores, axis=-1)
    context = tf.matmul(attn, enc_out_tensor)
    return context

# --- Pemuatan Model Terlatih ---
print("Loading pre-trained model...")
custom_objects = {
    "smooth_sparse_cce": smooth_sparse_cce,
    "create_encoder_mask": create_encoder_mask,
    "dot_attention_fn": dot_attention_fn
}

try:
    model = tf.keras.models.load_model(MODEL_PATH, custom_objects=custom_objects)
    model.summary()
    print(f"Model successfully loaded from {MODEL_PATH}")
except FileNotFoundError:
    print(f"Error: Model file not found at '{MODEL_PATH}'")
    print("Pastikan file .keras ada dan path INPUT_DIR sudah benar.")
    raise
except Exception as e:
    print(f"An error occurred while loading the model: {e}")
    raise

# --- Fungsi Pascapemrosesan ---
def remove_consecutive_duplicates(words):
    """Menghapus kata duplikat yang muncul secara berurutan."""
    if not words: return []
    output = [words[0]]
    for word in words[1:]:
        if word != output[-1]:
            output.append(word)
    return output

def remove_nonconsecutive_duplicates(words):
    """Menghapus kata duplikat yang muncul di posisi berbeda (tidak berurutan)."""
    seen = set()
    out = []
    for w in words:
        if w and w not in seen:
            out.append(w)
            seen.add(w)
    return out

def cut_at_first_sentence_end(text):
    """Memotong kalimat pada tanda baca akhir pertama (., !, ?)."""
    match = re.search(r'[.!?]', text)
    return text[:match.end()] if match else text

def post_process_sequence(token_ids):
    """
    Mengubah daftar token menjadi teks yang bersih:
    - Menghapus token khusus (<start>, <end>, <pad>)
    - Menghapus duplikat kata
    - Membersihkan spasi dan memotong pada akhir kalimat
    """
    words = [rev_dec_index.get(tok, "") for tok in token_ids if tok not in (start_tok, end_tok, 0)]
    words = remove_consecutive_duplicates(words)
    words = remove_nonconsecutive_duplicates(words)
    text = " ".join(words).strip()
    text = cut_at_first_sentence_end(text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# --- Fungsi Pendukung Beam Search ---
def get_required_token_ids(input_text):
    """Mengambil token wajib (yang berasal dari input) untuk penalti jika hilang di keluaran."""
    words = set(re.sub(r'[^\w\s]', '', input_text.lower()).split())
    return {dec_tok.word_index[w] for w in words if w in dec_tok.word_index}

def length_penalty(length, alpha=LENGTH_PENALTY_ALPHA):
    """Menghitung penalti panjang agar kalimat tidak terlalu pendek atau panjang."""
    return ((5.0 + length) / 6.0) ** alpha

# --- Implementasi Beam Search Decoder ---
def beam_search_decoder(input_text, beam_width=BEAM_WIDTH, max_out_len=None, top_k=TOP_K):
    """
    Melakukan proses decoding menggunakan beam search dengan penalti panjang, repetisi, dan token wajib.

    Args:
        input_text (str): Kalimat masukan (sekuens kata kunci).
        beam_width (int): Jumlah beam aktif di setiap langkah.
        max_out_len (int): Panjang maksimum keluaran.
        top_k (int): Jumlah token terbaik yang dipertimbangkan di setiap langkah.

    Returns:
        str: Kalimat keluaran hasil dekoding yang telah diproses.
    """
    if max_out_len is None:
        max_out_len = min(max_dec_len, int(len(input_text.split()) * 2.5 + 5))

    seq_enc = pad_sequences(enc_tok.texts_to_sequences([input_text]), maxlen=max_enc_len, padding="post")
    required_ids = get_required_token_ids(input_text)
    beams = [([start_tok], 0.0, set())]

    for _ in range(max_out_len):
        all_candidates = []
        for tokens, score, found_required in beams:
            # Jika beam sudah mencapai token <end>, simpan dan lanjutkan ke beam berikutnya
            if tokens[-1] == end_tok:
                all_candidates.append((tokens, score, found_required))
                continue

            dec_in = pad_sequences([tokens], maxlen=max_dec_len, padding="post")
            preds = model.predict([seq_enc, dec_in], verbose=0)
            log_probs = np.log(preds[0, len(tokens)-1, :] + 1e-9)

            # Penalti untuk repetisi token
            token_counts = Counter(tokens)
            for token_id, cnt in token_counts.items():
                if token_id in (start_tok, end_tok, 0): continue
                log_probs[token_id] -= REPETITION_PENALTY * cnt

            # Penalti untuk n-gram berulang
            if len(tokens) >= NO_REPEAT_NGRAM_SIZE:
                current_ngram_prefix = tuple(tokens[-(NO_REPEAT_NGRAM_SIZE-1):])
                history_ngrams = {tuple(tokens[i:i+NO_REPEAT_NGRAM_SIZE]) for i in range(len(tokens) - NO_REPEAT_NGRAM_SIZE + 1)}
                for token_id in range(len(log_probs)):
                    if current_ngram_prefix + (token_id,) in history_ngrams:
                        log_probs[token_id] = -np.inf

            # Ambil top-k kandidat
            top_k_indices = np.argsort(log_probs)[-top_k:]
            for idx in top_k_indices:
                new_tokens = tokens + [idx]
                new_score = score + log_probs[idx]
                new_found_required = found_required.copy()
                if idx in required_ids:
                    new_found_required.add(idx)
                all_candidates.append((new_tokens, new_score, new_found_required))

        if not all_candidates:
            break

        # Simpan hanya beam terbaik berdasarkan skor yang dinormalisasi oleh penalti panjang
        ordered = sorted(all_candidates, key=lambda x: x[1] / length_penalty(len(x[0])), reverse=True)
        beams = ordered[:beam_width]

        # Hentikan jika semua beam berakhir
        if all(b[0][-1] == end_tok for b in beams):
            break

    # Penilaian akhir beam dengan penalti untuk token wajib yang hilang
    def final_score(beam):
        tokens, score, found = beam
        penalty = len(required_ids - found) * REQUIRED_TOKEN_PENALTY
        return (score / length_penalty(len(tokens))) + penalty

    best_beam = max(beams, key=final_score)
    return post_process_sequence(best_beam[0])

print("\n--- Setup selesai. Model siap digunakan untuk inferensi. ---")

2025-11-03 02:33:02.162874: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762137182.378569      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762137182.436669      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading tokenizers...
Tokenizers loaded successfully.
Encoder Vocab: 305 | Decoder Vocab: 526
Max Seq Lens (Enc/Dec): 7 / 12
Loading pre-trained model...


I0000 00:00:1762137195.035351      37 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1762137195.035998      37 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Model successfully loaded from /kaggle/input/seq2seq-kaggle/tensorflow2/default/1/seq2seq_model.keras

--- Setup selesai. Model siap digunakan untuk inferensi. ---


In [2]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip
!chmod +x ngrok
!./ngrok authtoken 34AycPhBQWeAvdotE3zBxd1TT1G_7oLroAXm1hnup3koR2p7f

--2025-11-03 02:33:18--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 99.83.220.108, 35.71.179.82, 13.248.244.96, ...
Connecting to bin.equinox.io (bin.equinox.io)|99.83.220.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13921656 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2025-11-03 02:33:19 (99.3 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13921656/13921656]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   
Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [3]:
!ngrok authtoken 34AycPhBQWeAvdotE3zBxd1TT1G_7oLroAXm1hnup3koR2p7f
!pip install fastapi uvicorn pyngrok nest_asyncio

/bin/bash: line 1: ngrok: command not found
Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.4.1


In [None]:
# --- Setup Lingkungan dan Impor Library ---
import nest_asyncio 
from fastapi import FastAPI  
from pydantic import BaseModel 
from pyngrok import ngrok

# --- Inisialisasi Aplikasi FastAPI ---
app = FastAPI()

class InputText(BaseModel):
    # Mendefinisikan struktur data input yang diharapkan dari klien API
    # Klien harus mengirimkan JSON dengan key "text" yang berisi string
    text: str

# --- Endpoint API untuk Prediksi ---
@app.post("/predict")  # Mendefinisikan endpoint yang merespons permintaan HTTP POST di jalur /predict
def predict_text(data: InputText):
    # Menerima data input yang sudah divalidasi oleh InputText
    input_text = data.text.strip()
    
    # Validasi input kosong
    if not input_text:
        return {"error": "Input kosong"}

    try:
        # Memanggil fungsi decoding model Seq2Seq yang telah dilatih
        output = beam_search_decoder(input_text)
        
        # Mengembalikan hasil prediksi dalam format JSON ke klien
        return {"input": input_text, "output": output}
    
    except Exception as e:
        return {"error": str(e)}

# --- Konfigurasi Server dan Tunneling ---
port = 8000  

# Menggunakan library pyngrok untuk membuat tunnel
ngrok_tunnel = ngrok.connect(port)
print("Public URL:", ngrok_tunnel.public_url)

# Menjalankan Server Uvicorn
nest_asyncio.apply()  # Untuk kompatibilitas lingkungan notebook
import uvicorn 
# Menjalankan aplikasi FastAPI di server Uvicorn
uvicorn.run(app, host="0.0.0.0", port=port)

Public URL: https://noncapricious-goutily-jurnee.ngrok-free.dev


INFO:     Started server process [37]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
I0000 00:00:1762137295.717183     101 service.cc:148] XLA service 0x7a527c0457d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1762137295.717949     101 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1762137295.717970     101 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1762137296.181950     101 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1762137297.342763     101 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


INFO:     182.253.48.126:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     182.253.48.126:0 - "POST /predict HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [37]
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 211, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever
    self._run_once()
  File "/usr/lib/python3.11/asyncio/base_events.py", line 1921, in _run_once
    handle = self._ready.p