# Notebook: Uji Cepat Leksara
Notebook ini menguji library `leksara` end-to-end: cek lingkungan, instal, import, uji fungsi/kualitas output, pipeline kustom, I/O, benchmark, hingga pembersihan.

In [1]:
import pandas as pd;
from leksara import leksara, ReviewChain;
from leksara.functions.cleaner.basic import case_normal, remove_punctuation;
from leksara.functions.patterns.pii import replace_phone, replace_email;

df = pd.DataFrame({
    "chat_id": [1, 2],
    "chat_message": [
        "Halo! Nomor saya 0812-3456-7890. Email: x@y.com",
        "Hubungi +6281234567890 ya — EMAIL saya: test@mail.co.id!"
    ]
})

# --- Functional style (custom pipeline) ---
custom_pipeline = {
    "patterns": [
        replace_phone,
        replace_email
    ],
    "functions": [
        case_normal,
        remove_punctuation
    ]
}

df["safe_message"] = leksara(df["chat_message"], pipeline=custom_pipeline)
print(df[["chat_id", "safe_message"]])

   chat_id                               safe_message
0        1   halo nomor saya phone_number email email
1        2  hubungi phone_number ya  email saya email


In [2]:
from leksara.functions.review.advanced import word_normalization

data = ["Produk Bagus sekali", "Saya membeli peralatan rumah tangga"]

custom_pipeline = {
    "patterns": [],
    "functions": [
        (word_normalization, {"word_list": ["Bagus"], "mode": "keep"})
    ]
}

print(leksara(data, pipeline=custom_pipeline))


['produk Bagus sekali', 'saya beli alat rumah tangga']


In [None]:
from leksara.functions.cleaner.basic import (
    case_normal, remove_stopwords, remove_whitespace
)

# --- contoh data ---
df = pd.DataFrame({
    "chat_id": [1, 2, 3],
    "chat_message": [
        "Saya sangat suka produk ini, dan saya akan beli lagi!",
        "Produk ini bagus sekali untuk dipakai di rumah.",
        "Namun, harga-nya agak mahal ya..."
    ]
})

print("=== Data Asli ===")
print(df)

# --- pipeline sederhana ---
df["cleaned"] = (
    df["chat_message"]
    .apply(case_normal)          # ubah jadi huruf kecil
    .apply(remove_stopwords)     # hapus stopwords (gabungan NLTK + lokal)
    .apply(remove_whitespace)    # normalisasi spasi
)

print("\n=== Setelah Cleaning ===")
print(df[["chat_id", "cleaned"]])

=== Data Asli ===
   chat_id                                       chat_message
0        1  Saya sangat suka produk ini, dan saya akan bel...
1        2    Produk ini bagus sekali untuk dipakai di rumah.
2        3                  Namun, harga-nya agak mahal ya...

=== Setelah Cleaning ===
   chat_id                      cleaned
0        1         suka produk , beli !
1        2  produk bagus dipakai rumah.
2        3      , harga-nya mahal ya...
