Generated from: intent_training.ipynb
Converted at: 2026-01-23T05:56:13.369Z
Next step (optional): refactor into modules & generate tests with RunCell
Quick start: pip install runcell

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score, multilabel_confusion_matrix, confusion_matrix
import re
from sentence_transformers import SentenceTransformer

In [2]:
file_path = "data/intent.xlsx"
data = pd.read_excel(file_path)

In [3]:
data = data [["text", "intent"]]
data.head()

Unnamed: 0,text,intent
0,halo mlibbot,salam
1,selamat pagi perpustakaan,salam
2,hai selamat siang,salam
3,makasih ya mlibbot,salam
4,terima kasih atas bantuannya,salam


In [5]:
print(data.columns)
data["intent"].value_counts()

Index(['text', 'intent'], dtype='object')


intent
salam                        68
tanya_fungsi_mlibbot         65
cari_buku_judul              65
cari_buku_penulis            65
cari_buku_topik              65
cari_buku_isbn_callnumber    65
cek_ketersediaan_buku        65
lokasi_buku_rak              65
jam_buka                     65
lokasi_perpustakaan          65
panduan_peminjaman           65
panduan_pengembalian         65
panduan_perpanjangan         65
info_denda                   65
tata_tertib                  65
layanan_ruang_diskusi        65
layanan_ejournal_ebook       65
layanan_turnitin             65
donasi_buku                  65
akses_repository             65
cari_rekomendasi             65
lainnya                      65
Name: count, dtype: int64

In [6]:
def preprocess(text: str) -> str:
    if not isinstance(text, str):
        text = str(text)

    text = text.lower()
    text = re.sub(r"http\S+|www\.\S+", " ", text)
    text = re.sub(r"[^0-9a-zA-ZÀ-ÿ\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

In [7]:
data["hasil"] = data["text"].apply(preprocess)
data[["text", "hasil"]].head(20)

Unnamed: 0,text,hasil
0,halo mlibbot,halo mlibbot
1,selamat pagi perpustakaan,selamat pagi perpustakaan
2,hai selamat siang,hai selamat siang
3,makasih ya mlibbot,makasih ya mlibbot
4,terima kasih atas bantuannya,terima kasih atas bantuannya
5,halo,halo
6,hai,hai
7,hey,hey
8,kamu itu bisa bantu apa aja sih,kamu itu bisa bantu apa aja sih
9,mlibbot fungsinya apa,mlibbot fungsinya apa


In [8]:
data.isnull().sum()

text      0
intent    0
hasil     0
dtype: int64

In [9]:
data = data.dropna()

In [10]:
data.isnull().sum()

text      0
intent    0
hasil     0
dtype: int64

In [11]:
data

Unnamed: 0,text,intent,hasil
0,halo mlibbot,salam,halo mlibbot
1,selamat pagi perpustakaan,salam,selamat pagi perpustakaan
2,hai selamat siang,salam,hai selamat siang
3,makasih ya mlibbot,salam,makasih ya mlibbot
4,terima kasih atas bantuannya,salam,terima kasih atas bantuannya
...,...,...,...
1428,"buat nemenin praktikum basis data, enak klo ad...",cari_rekomendasi,buat nemenin praktikum basis data enak klo ada...
1429,sy suka bku yg bahas teori trus lanjut studi k...,cari_rekomendasi,sy suka bku yg bahas teori trus lanjut studi k...
1430,"gw lg bosen baca modul doang, pengen ganti sua...",cari_rekomendasi,gw lg bosen baca modul doang pengen ganti suas...
1431,"sy ngerasa perlu satu bacaan utama soal UI UX,...",cari_rekomendasi,sy ngerasa perlu satu bacaan utama soal ui ux ...


In [12]:
data = data[["hasil", "intent"]]

In [13]:
data

Unnamed: 0,hasil,intent
0,halo mlibbot,salam
1,selamat pagi perpustakaan,salam
2,hai selamat siang,salam
3,makasih ya mlibbot,salam
4,terima kasih atas bantuannya,salam
...,...,...
1428,buat nemenin praktikum basis data enak klo ada...,cari_rekomendasi
1429,sy suka bku yg bahas teori trus lanjut studi k...,cari_rekomendasi
1430,gw lg bosen baca modul doang pengen ganti suas...,cari_rekomendasi
1431,sy ngerasa perlu satu bacaan utama soal ui ux ...,cari_rekomendasi


In [14]:
X = data["hasil"].astype(str).tolist()
y = data["intent"].astype(str).tolist()

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [16]:
len(X_train), len(X_test)

(1146, 287)

In [17]:
INDOBERT_MODEL_NAME = "LazarusNLP/all-indobert-base-v4"
class IndoBertEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, model_name=INDOBERT_MODEL_NAME, batch_size=32):
        self.model_name = model_name
        self.batch_size = batch_size
        self.model = None

    def fit(self, X, y=None):
        if self.model is None:
            self.model = SentenceTransformer(self.model_name)
        return self

    def transform(self, X):
        embeddings = self.model.encode(
            X,
            batch_size=self.batch_size,
            convert_to_numpy=True,
            show_progress_bar=False,
            normalize_embeddings=True,  
        ).astype(np.float32)
        return embeddings

In [18]:
pipe_logreg = Pipeline([
    ("tfidf", TfidfVectorizer(
        preprocessor=None,    
        lowercase=False        
    )),
    ("clf", LogisticRegression(
        max_iter=500,
        n_jobs=-1
    ))
])

In [19]:
param_grid_logreg = {
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "tfidf__min_df": [1, 2],
    "clf__C": [0.1, 1.0, 5.0]
}

In [20]:
grid_logreg = GridSearchCV(
    pipe_logreg,
    param_grid_logreg,
    cv=5,
    n_jobs=-1,
    verbose=2
)

In [21]:
grid_logreg.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits




0,1,2
,"estimator  estimator: estimator object This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed.",Pipeline(step... n_jobs=-1))])
,"param_grid  param_grid: dict or list of dictionaries Dictionary with parameters names (`str`) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings.","{'clf__C': [0.1, 1.0, ...], 'tfidf__min_df': [1, 2], 'tfidf__ngram_range': [(1, ...), (1, ...)]}"
,"scoring  scoring: str, callable, list, tuple or dict, default=None Strategy to evaluate the performance of the cross-validated model on the test set. If `scoring` represents a single score, one can use: - a single string (see :ref:`scoring_string_names`); - a callable (see :ref:`scoring_callable`) that returns a single value; - `None`, the `estimator`'s  :ref:`default evaluation criterion ` is used. If `scoring` represents multiple scores, one can use: - a list or tuple of unique strings; - a callable returning a dictionary where the keys are the metric  names and the values are the metric scores; - a dictionary with metric names as keys and callables as values. See :ref:`multimetric_grid_search` for an example.",
,"n_jobs  n_jobs: int, default=None Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20  `n_jobs` default changed from 1 to None",-1
,"refit  refit: bool, str, or callable, default=True Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given ``cv_results_``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``GridSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` to see how to design a custom selection strategy using a callable via `refit`. See :ref:`this example ` for an example of how to use ``refit=callable`` to balance model complexity and cross-validated score. .. versionchanged:: 0.20  Support for callable added.",True
,"cv  cv: int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22  ``cv`` default value if None changed from 3-fold to 5-fold.",5
,"verbose  verbose: int Controls the verbosity: the higher, the more messages. - >1 : the computation time for each fold and parameter candidate is  displayed; - >2 : the score is also displayed; - >3 : the fold and candidate parameter indexes are also displayed  together with the starting time of the computation.",2
,"pre_dispatch  pre_dispatch: int, or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use  this for lightweight and fast-running jobs, to avoid delays due to on-demand  spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'",'2*n_jobs'
,"error_score  error_score: 'raise' or numeric, default=np.nan Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error.",
,"return_train_score  return_train_score: bool, default=False If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21  Default value was changed from ``True`` to ``False``",False

0,1,2
,"input  input: {'filename', 'file', 'content'}, default='content' - If `'filename'`, the sequence passed as an argument to fit is  expected to be a list of filenames that need reading to fetch  the raw content to analyze. - If `'file'`, the sequence items must have a 'read' method (file-like  object) that is called to fetch the bytes in memory. - If `'content'`, the input is expected to be a sequence of items that  can be of type string or byte.",'content'
,"encoding  encoding: str, default='utf-8' If bytes or files are given to analyze, this encoding is used to decode.",'utf-8'
,"decode_error  decode_error: {'strict', 'ignore', 'replace'}, default='strict' Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'.",'strict'
,"strip_accents  strip_accents: {'ascii', 'unicode'} or callable, default=None Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have a direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) means no character normalization is performed. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`.",
,"lowercase  lowercase: bool, default=True Convert all characters to lowercase before tokenizing.",False
,"preprocessor  preprocessor: callable, default=None Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer`` is not callable.",
,"tokenizer  tokenizer: callable, default=None Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``.",
,"analyzer  analyzer: {'word', 'char', 'char_wb'} or callable, default='word' Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21  Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data  is first read from the file and then passed to the given callable  analyzer.",'word'
,"stop_words  stop_words: {'english'}, list, default=None If a string, it is passed to _check_stop_list and the appropriate stop list is returned. 'english' is currently the only supported string value. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. In this case, setting `max_df` to a higher value, such as in the range (0.7, 1.0), can automatically detect and filter stop words based on intra corpus document frequency of terms.",
,"token_pattern  token_pattern: str, default=r""(?u)\\b\\w\\w+\\b"" Regular expression denoting what constitutes a ""token"", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted.",'(?u)\\b\\w\\w+\\b'

0,1,2
,"penalty  penalty: {'l1', 'l2', 'elasticnet', None}, default='l2' Specify the norm of the penalty: - `None`: no penalty is added; - `'l2'`: add a L2 penalty term and it is the default choice; - `'l1'`: add a L1 penalty term; - `'elasticnet'`: both L1 and L2 penalty terms are added. .. warning::  Some penalties may not work with some solvers. See the parameter  `solver` below, to know the compatibility between the penalty and  solver. .. versionadded:: 0.19  l1 penalty with SAGA solver (allowing 'multinomial' + L1) .. deprecated:: 1.8  `penalty` was deprecated in version 1.8 and will be removed in 1.10.  Use `l1_ratio` instead. `l1_ratio=0` for `penalty='l2'`, `l1_ratio=1` for  `penalty='l1'` and `l1_ratio` set to any float between 0 and 1 for  `'penalty='elasticnet'`.",'deprecated'
,"C  C: float, default=1.0 Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization. `C=np.inf` results in unpenalized logistic regression. For a visual example on the effect of tuning the `C` parameter with an L1 penalty, see: :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py`.",5.0
,"l1_ratio  l1_ratio: float, default=0.0 The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`. Setting `l1_ratio=1` gives a pure L1-penalty, setting `l1_ratio=0` a pure L2-penalty. Any value between 0 and 1 gives an Elastic-Net penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2`. .. warning::  Certain values of `l1_ratio`, i.e. some penalties, may not work with some  solvers. See the parameter `solver` below, to know the compatibility between  the penalty and solver. .. versionchanged:: 1.8  Default value changed from None to 0.0. .. deprecated:: 1.8  `None` is deprecated and will be removed in version 1.10. Always use  `l1_ratio` to specify the penalty type.",0.0
,"dual  dual: bool, default=False Dual (constrained) or primal (regularized, see also :ref:`this equation `) formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer `dual=False` when n_samples > n_features.",False
,"tol  tol: float, default=1e-4 Tolerance for stopping criteria.",0.0001
,"fit_intercept  fit_intercept: bool, default=True Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.",True
,"intercept_scaling  intercept_scaling: float, default=1 Useful only when the solver `liblinear` is used and `self.fit_intercept` is set to `True`. In this case, `x` becomes `[x, self.intercept_scaling]`, i.e. a ""synthetic"" feature with constant value equal to `intercept_scaling` is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. .. note::  The synthetic feature weight is subject to L1 or L2  regularization as all other features.  To lessen the effect of regularization on synthetic feature weight  (and therefore on the intercept) `intercept_scaling` has to be increased.",1
,"class_weight  class_weight: dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17  *class_weight='balanced'*",
,"random_state  random_state: int, RandomState instance, default=None Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details.",
,"solver  solver: {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, default='lbfgs' Algorithm to use in the optimization problem. Default is 'lbfgs'. To choose a solver, you might want to consider the following aspects: - 'lbfgs' is a good default solver because it works reasonably well for a wide  class of problems. - For :term:`multiclass` problems (`n_classes >= 3`), all solvers except  'liblinear' minimize the full multinomial loss, 'liblinear' will raise an  error. - 'newton-cholesky' is a good choice for  `n_samples` >> `n_features * n_classes`, especially with one-hot encoded  categorical features with rare categories. Be aware that the memory usage  of this solver has a quadratic dependency on `n_features * n_classes`  because it explicitly computes the full Hessian matrix. - For small datasets, 'liblinear' is a good choice, whereas 'sag'  and 'saga' are faster for large ones; - 'liblinear' can only handle binary classification by default. To apply a  one-versus-rest scheme for the multiclass setting one can wrap it with the  :class:`~sklearn.multiclass.OneVsRestClassifier`. .. warning::  The choice of the algorithm depends on the penalty chosen (`l1_ratio=0`  for L2-penalty, `l1_ratio=1` for L1-penalty and `0 < l1_ratio < 1` for  Elastic-Net) and on (multinomial) multiclass support:  ================= ======================== ======================  solver l1_ratio multinomial multiclass  ================= ======================== ======================  'lbfgs' l1_ratio=0 yes  'liblinear' l1_ratio=1 or l1_ratio=0 no  'newton-cg' l1_ratio=0 yes  'newton-cholesky' l1_ratio=0 yes  'sag' l1_ratio=0 yes  'saga' 0<=l1_ratio<=1 yes  ================= ======================== ====================== .. note::  'sag' and 'saga' fast convergence is only guaranteed on features  with approximately the same scale. You can preprocess the data with  a scaler from :mod:`sklearn.preprocessing`. .. seealso::  Refer to the :ref:`User Guide ` for more  information regarding :class:`LogisticRegression` and more specifically the  :ref:`Table `  summarizing solver/penalty supports. .. versionadded:: 0.17  Stochastic Average Gradient (SAG) descent solver. Multinomial support in  version 0.18. .. versionadded:: 0.19  SAGA solver. .. versionchanged:: 0.22  The default solver changed from 'liblinear' to 'lbfgs' in 0.22. .. versionadded:: 1.2  newton-cholesky solver. Multinomial support in version 1.6.",'lbfgs'


In [22]:
print("Best params (LogReg):", grid_logreg.best_params_)
print("Best CV score (LogReg):", grid_logreg.best_score_)

Best params (LogReg): {'clf__C': 5.0, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 2)}
Best CV score (LogReg): 0.7949610784127588


In [23]:
best_logreg = grid_logreg.best_estimator_

In [24]:
y_pred_logreg = best_logreg.predict(X_test)
acc_logreg = accuracy_score(y_test, y_pred_logreg)
print(f"Test Accuracy (LogReg TF-IDF): {acc_logreg:.3f}\n")

Test Accuracy (LogReg TF-IDF): 0.826



In [25]:
print("Classification Report (LogReg TF-IDF):")
print(classification_report(y_test, y_pred_logreg))

Classification Report (LogReg TF-IDF):
                           precision    recall  f1-score   support

         akses_repository       0.79      0.85      0.81        13
cari_buku_isbn_callnumber       0.91      0.77      0.83        13
          cari_buku_judul       0.75      0.92      0.83        13
        cari_buku_penulis       0.64      0.54      0.58        13
          cari_buku_topik       0.91      0.77      0.83        13
         cari_rekomendasi       0.87      1.00      0.93        13
    cek_ketersediaan_buku       0.90      0.69      0.78        13
              donasi_buku       0.64      0.69      0.67        13
               info_denda       0.89      0.62      0.73        13
                 jam_buka       0.92      0.85      0.88        13
                  lainnya       0.72      1.00      0.84        13
   layanan_ejournal_ebook       0.80      0.92      0.86        13
    layanan_ruang_diskusi       0.93      1.00      0.96        13
         layanan_turni

In [26]:
# Analisis TP, FP, FN, TN per Intent (LogReg TF-IDF)
def analyze_intent_difficulty(y_true, y_pred, model):
    labels = model.classes_
    mcm = multilabel_confusion_matrix(y_true, y_pred, labels=labels)
    
    results = []
    for i, intent in enumerate(labels):
        tn, fp, fn, tp = mcm[i].ravel()
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        results.append({
            "Intent": intent,
            "TP": tp, "FP": fp, "FN": fn, "TN": tn,
            "Precision": round(precision, 4),
            "Recall": round(recall, 4),
            "F1-Score": round(f1, 4)
        })
    return pd.DataFrame(results).sort_values("F1-Score", ascending=True)

In [27]:
print("\nAnalisis Intent Paling Sulit (LogReg TF-IDF):")
df_logreg = analyze_intent_difficulty(y_test, y_pred_logreg, best_logreg)
print(df_logreg[["Intent", "F1-Score", "Precision", "Recall", "TP", "FP", "FN"]].head(5).to_string(index=False))


Analisis Intent Paling Sulit (LogReg TF-IDF):
              Intent  F1-Score  Precision  Recall  TP  FP  FN
   cari_buku_penulis    0.5833     0.6364  0.5385   7   4   6
         donasi_buku    0.6667     0.6429  0.6923   9   5   4
     lokasi_buku_rak    0.6667     0.5500  0.8462  11   9   2
          info_denda    0.7273     0.8889  0.6154   8   1   5
panduan_pengembalian    0.7273     0.8889  0.6154   8   1   5


In [28]:
pipe_logreg_indobert = Pipeline([
    ("indobert", IndoBertEncoder(
        model_name=INDOBERT_MODEL_NAME,
        batch_size=32
    )),
    ("clf", LogisticRegression(
        max_iter=1000,
        n_jobs=-1
    ))
])

In [29]:
param_grid_logreg_indobert = {
    "clf__C": [0.1, 1.0, 5.0],
    "clf__class_weight": [None, "balanced"],
}

In [30]:
grid_logreg_indobert = GridSearchCV(
    pipe_logreg_indobert,
    param_grid_logreg_indobert,
    cv=5,
    n_jobs=-1,
    verbose=2
)

In [31]:
grid_logreg_indobert.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits




0,1,2
,"estimator  estimator: estimator object This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed.",Pipeline(step... n_jobs=-1))])
,"param_grid  param_grid: dict or list of dictionaries Dictionary with parameters names (`str`) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings.","{'clf__C': [0.1, 1.0, ...], 'clf__class_weight': [None, 'balanced']}"
,"scoring  scoring: str, callable, list, tuple or dict, default=None Strategy to evaluate the performance of the cross-validated model on the test set. If `scoring` represents a single score, one can use: - a single string (see :ref:`scoring_string_names`); - a callable (see :ref:`scoring_callable`) that returns a single value; - `None`, the `estimator`'s  :ref:`default evaluation criterion ` is used. If `scoring` represents multiple scores, one can use: - a list or tuple of unique strings; - a callable returning a dictionary where the keys are the metric  names and the values are the metric scores; - a dictionary with metric names as keys and callables as values. See :ref:`multimetric_grid_search` for an example.",
,"n_jobs  n_jobs: int, default=None Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20  `n_jobs` default changed from 1 to None",-1
,"refit  refit: bool, str, or callable, default=True Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given ``cv_results_``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``GridSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` to see how to design a custom selection strategy using a callable via `refit`. See :ref:`this example ` for an example of how to use ``refit=callable`` to balance model complexity and cross-validated score. .. versionchanged:: 0.20  Support for callable added.",True
,"cv  cv: int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22  ``cv`` default value if None changed from 3-fold to 5-fold.",5
,"verbose  verbose: int Controls the verbosity: the higher, the more messages. - >1 : the computation time for each fold and parameter candidate is  displayed; - >2 : the score is also displayed; - >3 : the fold and candidate parameter indexes are also displayed  together with the starting time of the computation.",2
,"pre_dispatch  pre_dispatch: int, or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use  this for lightweight and fast-running jobs, to avoid delays due to on-demand  spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'",'2*n_jobs'
,"error_score  error_score: 'raise' or numeric, default=np.nan Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error.",
,"return_train_score  return_train_score: bool, default=False If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21  Default value was changed from ``True`` to ``False``",False

0,1,2
,model_name,'LazarusNLP/all-indobert-base-v4'
,batch_size,32

0,1,2
,"penalty  penalty: {'l1', 'l2', 'elasticnet', None}, default='l2' Specify the norm of the penalty: - `None`: no penalty is added; - `'l2'`: add a L2 penalty term and it is the default choice; - `'l1'`: add a L1 penalty term; - `'elasticnet'`: both L1 and L2 penalty terms are added. .. warning::  Some penalties may not work with some solvers. See the parameter  `solver` below, to know the compatibility between the penalty and  solver. .. versionadded:: 0.19  l1 penalty with SAGA solver (allowing 'multinomial' + L1) .. deprecated:: 1.8  `penalty` was deprecated in version 1.8 and will be removed in 1.10.  Use `l1_ratio` instead. `l1_ratio=0` for `penalty='l2'`, `l1_ratio=1` for  `penalty='l1'` and `l1_ratio` set to any float between 0 and 1 for  `'penalty='elasticnet'`.",'deprecated'
,"C  C: float, default=1.0 Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization. `C=np.inf` results in unpenalized logistic regression. For a visual example on the effect of tuning the `C` parameter with an L1 penalty, see: :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py`.",5.0
,"l1_ratio  l1_ratio: float, default=0.0 The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`. Setting `l1_ratio=1` gives a pure L1-penalty, setting `l1_ratio=0` a pure L2-penalty. Any value between 0 and 1 gives an Elastic-Net penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2`. .. warning::  Certain values of `l1_ratio`, i.e. some penalties, may not work with some  solvers. See the parameter `solver` below, to know the compatibility between  the penalty and solver. .. versionchanged:: 1.8  Default value changed from None to 0.0. .. deprecated:: 1.8  `None` is deprecated and will be removed in version 1.10. Always use  `l1_ratio` to specify the penalty type.",0.0
,"dual  dual: bool, default=False Dual (constrained) or primal (regularized, see also :ref:`this equation `) formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer `dual=False` when n_samples > n_features.",False
,"tol  tol: float, default=1e-4 Tolerance for stopping criteria.",0.0001
,"fit_intercept  fit_intercept: bool, default=True Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.",True
,"intercept_scaling  intercept_scaling: float, default=1 Useful only when the solver `liblinear` is used and `self.fit_intercept` is set to `True`. In this case, `x` becomes `[x, self.intercept_scaling]`, i.e. a ""synthetic"" feature with constant value equal to `intercept_scaling` is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. .. note::  The synthetic feature weight is subject to L1 or L2  regularization as all other features.  To lessen the effect of regularization on synthetic feature weight  (and therefore on the intercept) `intercept_scaling` has to be increased.",1
,"class_weight  class_weight: dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17  *class_weight='balanced'*",'balanced'
,"random_state  random_state: int, RandomState instance, default=None Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details.",
,"solver  solver: {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, default='lbfgs' Algorithm to use in the optimization problem. Default is 'lbfgs'. To choose a solver, you might want to consider the following aspects: - 'lbfgs' is a good default solver because it works reasonably well for a wide  class of problems. - For :term:`multiclass` problems (`n_classes >= 3`), all solvers except  'liblinear' minimize the full multinomial loss, 'liblinear' will raise an  error. - 'newton-cholesky' is a good choice for  `n_samples` >> `n_features * n_classes`, especially with one-hot encoded  categorical features with rare categories. Be aware that the memory usage  of this solver has a quadratic dependency on `n_features * n_classes`  because it explicitly computes the full Hessian matrix. - For small datasets, 'liblinear' is a good choice, whereas 'sag'  and 'saga' are faster for large ones; - 'liblinear' can only handle binary classification by default. To apply a  one-versus-rest scheme for the multiclass setting one can wrap it with the  :class:`~sklearn.multiclass.OneVsRestClassifier`. .. warning::  The choice of the algorithm depends on the penalty chosen (`l1_ratio=0`  for L2-penalty, `l1_ratio=1` for L1-penalty and `0 < l1_ratio < 1` for  Elastic-Net) and on (multinomial) multiclass support:  ================= ======================== ======================  solver l1_ratio multinomial multiclass  ================= ======================== ======================  'lbfgs' l1_ratio=0 yes  'liblinear' l1_ratio=1 or l1_ratio=0 no  'newton-cg' l1_ratio=0 yes  'newton-cholesky' l1_ratio=0 yes  'sag' l1_ratio=0 yes  'saga' 0<=l1_ratio<=1 yes  ================= ======================== ====================== .. note::  'sag' and 'saga' fast convergence is only guaranteed on features  with approximately the same scale. You can preprocess the data with  a scaler from :mod:`sklearn.preprocessing`. .. seealso::  Refer to the :ref:`User Guide ` for more  information regarding :class:`LogisticRegression` and more specifically the  :ref:`Table `  summarizing solver/penalty supports. .. versionadded:: 0.17  Stochastic Average Gradient (SAG) descent solver. Multinomial support in  version 0.18. .. versionadded:: 0.19  SAGA solver. .. versionchanged:: 0.22  The default solver changed from 'liblinear' to 'lbfgs' in 0.22. .. versionadded:: 1.2  newton-cholesky solver. Multinomial support in version 1.6.",'lbfgs'


In [32]:
print("Best params (LogReg):", grid_logreg_indobert.best_params_)
print("Best CV score (LogReg):", grid_logreg_indobert.best_score_)

Best params (LogReg): {'clf__C': 5.0, 'clf__class_weight': 'balanced'}
Best CV score (LogReg): 0.7617846971710651


In [33]:
best_logreg_indobert = grid_logreg_indobert.best_estimator_

In [34]:
y_pred_logreg_indobert = best_logreg_indobert.predict(X_test)
acc_logreg_indobert = accuracy_score(y_test, y_pred_logreg_indobert)
print(f"Test Accuracy (LogReg IndoBERT): {acc_logreg_indobert:.3f}\n")

Test Accuracy (LogReg IndoBERT): 0.767



In [35]:
print("Classification Report (LogReg IndoBERT):")
print(classification_report(y_test, y_pred_logreg_indobert))

Classification Report (LogReg IndoBERT):
                           precision    recall  f1-score   support

         akses_repository       0.77      0.77      0.77        13
cari_buku_isbn_callnumber       0.92      0.85      0.88        13
          cari_buku_judul       0.56      0.77      0.65        13
        cari_buku_penulis       0.67      0.77      0.71        13
          cari_buku_topik       0.75      0.46      0.57        13
         cari_rekomendasi       0.77      0.77      0.77        13
    cek_ketersediaan_buku       0.70      0.54      0.61        13
              donasi_buku       0.80      0.92      0.86        13
               info_denda       1.00      0.85      0.92        13
                 jam_buka       0.77      0.77      0.77        13
                  lainnya       0.62      0.77      0.69        13
   layanan_ejournal_ebook       0.62      0.62      0.62        13
    layanan_ruang_diskusi       0.86      0.92      0.89        13
         layanan_tur

In [36]:
print("\nAnalisis Intent Paling Sulit (LogReg IndoBERT):")
df_indobert = analyze_intent_difficulty(y_test, y_pred_logreg_indobert, best_logreg_indobert)
print(df_indobert[["Intent", "F1-Score", "Precision", "Recall", "TP", "FP", "FN"]].head(5).to_string(index=False))


Analisis Intent Paling Sulit (LogReg IndoBERT):
                Intent  F1-Score  Precision  Recall  TP  FP  FN
       cari_buku_topik    0.5714     0.7500  0.4615   6   2   7
 cek_ketersediaan_buku    0.6087     0.7000  0.5385   7   3   6
layanan_ejournal_ebook    0.6154     0.6154  0.6154   8   5   5
    panduan_peminjaman    0.6154     0.6154  0.6154   8   5   5
       cari_buku_judul    0.6452     0.5556  0.7692  10   8   3


In [37]:
pipe_nb = Pipeline([
    ("tfidf", TfidfVectorizer(
        preprocessor=None,
        lowercase=False
    )),
    ("clf", MultinomialNB())
])

In [38]:
param_grid_nb = {
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "tfidf__min_df": [1, 2],
    "clf__alpha": [0.1, 0.5, 1.0]
}

In [39]:
grid_nb = GridSearchCV(
    pipe_nb,
    param_grid_nb,
    cv=5,
    n_jobs=-1,
    verbose=2
)

In [40]:
grid_nb.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


0,1,2
,"estimator  estimator: estimator object This is assumed to implement the scikit-learn estimator interface. Either estimator needs to provide a ``score`` function, or ``scoring`` must be passed.",Pipeline(step...inomialNB())])
,"param_grid  param_grid: dict or list of dictionaries Dictionary with parameters names (`str`) as keys and lists of parameter settings to try as values, or a list of such dictionaries, in which case the grids spanned by each dictionary in the list are explored. This enables searching over any sequence of parameter settings.","{'clf__alpha': [0.1, 0.5, ...], 'tfidf__min_df': [1, 2], 'tfidf__ngram_range': [(1, ...), (1, ...)]}"
,"scoring  scoring: str, callable, list, tuple or dict, default=None Strategy to evaluate the performance of the cross-validated model on the test set. If `scoring` represents a single score, one can use: - a single string (see :ref:`scoring_string_names`); - a callable (see :ref:`scoring_callable`) that returns a single value; - `None`, the `estimator`'s  :ref:`default evaluation criterion ` is used. If `scoring` represents multiple scores, one can use: - a list or tuple of unique strings; - a callable returning a dictionary where the keys are the metric  names and the values are the metric scores; - a dictionary with metric names as keys and callables as values. See :ref:`multimetric_grid_search` for an example.",
,"n_jobs  n_jobs: int, default=None Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20  `n_jobs` default changed from 1 to None",-1
,"refit  refit: bool, str, or callable, default=True Refit an estimator using the best found parameters on the whole dataset. For multiple metric evaluation, this needs to be a `str` denoting the scorer that would be used to find the best parameters for refitting the estimator at the end. Where there are considerations other than maximum score in choosing a best estimator, ``refit`` can be set to a function which returns the selected ``best_index_`` given ``cv_results_``. In that case, the ``best_estimator_`` and ``best_params_`` will be set according to the returned ``best_index_`` while the ``best_score_`` attribute will not be available. The refitted estimator is made available at the ``best_estimator_`` attribute and permits using ``predict`` directly on this ``GridSearchCV`` instance. Also for multiple metric evaluation, the attributes ``best_index_``, ``best_score_`` and ``best_params_`` will only be available if ``refit`` is set and all of them will be determined w.r.t this specific scorer. See ``scoring`` parameter to know more about multiple metric evaluation. See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` to see how to design a custom selection strategy using a callable via `refit`. See :ref:`this example ` for an example of how to use ``refit=callable`` to balance model complexity and cross-validated score. .. versionchanged:: 0.20  Support for callable added.",True
,"cv  cv: int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22  ``cv`` default value if None changed from 3-fold to 5-fold.",5
,"verbose  verbose: int Controls the verbosity: the higher, the more messages. - >1 : the computation time for each fold and parameter candidate is  displayed; - >2 : the score is also displayed; - >3 : the fold and candidate parameter indexes are also displayed  together with the starting time of the computation.",2
,"pre_dispatch  pre_dispatch: int, or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use  this for lightweight and fast-running jobs, to avoid delays due to on-demand  spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs'",'2*n_jobs'
,"error_score  error_score: 'raise' or numeric, default=np.nan Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error.",
,"return_train_score  return_train_score: bool, default=False If ``False``, the ``cv_results_`` attribute will not include training scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. .. versionadded:: 0.19 .. versionchanged:: 0.21  Default value was changed from ``True`` to ``False``",False

0,1,2
,"input  input: {'filename', 'file', 'content'}, default='content' - If `'filename'`, the sequence passed as an argument to fit is  expected to be a list of filenames that need reading to fetch  the raw content to analyze. - If `'file'`, the sequence items must have a 'read' method (file-like  object) that is called to fetch the bytes in memory. - If `'content'`, the input is expected to be a sequence of items that  can be of type string or byte.",'content'
,"encoding  encoding: str, default='utf-8' If bytes or files are given to analyze, this encoding is used to decode.",'utf-8'
,"decode_error  decode_error: {'strict', 'ignore', 'replace'}, default='strict' Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. By default, it is 'strict', meaning that a UnicodeDecodeError will be raised. Other values are 'ignore' and 'replace'.",'strict'
,"strip_accents  strip_accents: {'ascii', 'unicode'} or callable, default=None Remove accents and perform other character normalization during the preprocessing step. 'ascii' is a fast method that only works on characters that have a direct ASCII mapping. 'unicode' is a slightly slower method that works on any characters. None (default) means no character normalization is performed. Both 'ascii' and 'unicode' use NFKD normalization from :func:`unicodedata.normalize`.",
,"lowercase  lowercase: bool, default=True Convert all characters to lowercase before tokenizing.",False
,"preprocessor  preprocessor: callable, default=None Override the preprocessing (string transformation) stage while preserving the tokenizing and n-grams generation steps. Only applies if ``analyzer`` is not callable.",
,"tokenizer  tokenizer: callable, default=None Override the string tokenization step while preserving the preprocessing and n-grams generation steps. Only applies if ``analyzer == 'word'``.",
,"analyzer  analyzer: {'word', 'char', 'char_wb'} or callable, default='word' Whether the feature should be made of word or character n-grams. Option 'char_wb' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input. .. versionchanged:: 0.21  Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data  is first read from the file and then passed to the given callable  analyzer.",'word'
,"stop_words  stop_words: {'english'}, list, default=None If a string, it is passed to _check_stop_list and the appropriate stop list is returned. 'english' is currently the only supported string value. There are several known issues with 'english' and you should consider an alternative (see :ref:`stop_words`). If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == 'word'``. If None, no stop words will be used. In this case, setting `max_df` to a higher value, such as in the range (0.7, 1.0), can automatically detect and filter stop words based on intra corpus document frequency of terms.",
,"token_pattern  token_pattern: str, default=r""(?u)\\b\\w\\w+\\b"" Regular expression denoting what constitutes a ""token"", only used if ``analyzer == 'word'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). If there is a capturing group in token_pattern then the captured group content, not the entire match, becomes the token. At most one capturing group is permitted.",'(?u)\\b\\w\\w+\\b'

0,1,2
,"alpha  alpha: float or array-like of shape (n_features,), default=1.0 Additive (Laplace/Lidstone) smoothing parameter (set alpha=0 and force_alpha=True, for no smoothing).",0.1
,"force_alpha  force_alpha: bool, default=True If False and alpha is less than 1e-10, it will set alpha to 1e-10. If True, alpha will remain unchanged. This may cause numerical errors if alpha is too close to 0. .. versionadded:: 1.2 .. versionchanged:: 1.4  The default value of `force_alpha` changed to `True`.",True
,"fit_prior  fit_prior: bool, default=True Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",True
,"class_prior  class_prior: array-like of shape (n_classes,), default=None Prior probabilities of the classes. If specified, the priors are not adjusted according to the data.",


In [41]:
print("Best params (NB):", grid_nb.best_params_)
print("Best CV score (NB):", grid_nb.best_score_)

Best params (NB): {'clf__alpha': 0.1, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 2)}
Best CV score (NB): 0.7731573951015759


In [42]:
best_nb = grid_nb.best_estimator_

In [43]:
y_pred_nb = best_nb.predict(X_test)
acc_nb = accuracy_score(y_test, y_pred_nb)
print(f"Test Accuracy (Naive Bayes): {acc_nb:.3f}\n")

Test Accuracy (Naive Bayes): 0.787



In [44]:
print("Classification Report (Naive Bayes):")
print(classification_report(y_test, y_pred_nb))

Classification Report (Naive Bayes):
                           precision    recall  f1-score   support

         akses_repository       0.73      0.85      0.79        13
cari_buku_isbn_callnumber       0.86      0.92      0.89        13
          cari_buku_judul       0.86      0.92      0.89        13
        cari_buku_penulis       0.73      0.62      0.67        13
          cari_buku_topik       0.71      0.77      0.74        13
         cari_rekomendasi       0.85      0.85      0.85        13
    cek_ketersediaan_buku       0.90      0.69      0.78        13
              donasi_buku       0.57      0.62      0.59        13
               info_denda       0.78      0.54      0.64        13
                 jam_buka       0.92      0.92      0.92        13
                  lainnya       0.75      0.92      0.83        13
   layanan_ejournal_ebook       0.77      0.77      0.77        13
    layanan_ruang_diskusi       0.92      0.92      0.92        13
         layanan_turniti

In [45]:
print("\nAnalisis Intent Paling Sulit (Naive Bayes):")
df_nb = analyze_intent_difficulty(y_test, y_pred_nb, best_nb)
print(df_nb[["Intent", "F1-Score", "Precision", "Recall", "TP", "FP", "FN"]].head(5).to_string(index=False))


Analisis Intent Paling Sulit (Naive Bayes):
              Intent  F1-Score  Precision  Recall  TP  FP  FN
panduan_pengembalian    0.5600     0.5833  0.5385   7   5   6
         donasi_buku    0.5926     0.5714  0.6154   8   6   5
          info_denda    0.6364     0.7778  0.5385   7   2   6
   cari_buku_penulis    0.6667     0.7273  0.6154   8   3   5
         tata_tertib    0.6957     0.8000  0.6154   8   2   5


Pada percobaan ini, model Naive Bayes tetap menggunakan fitur TF-IDF dan tidak digabung dengan embedding IndoBERT. Alasannya karena secara prinsip, Multinomial Naive Bayes dirancang untuk bekerja dengan fitur berupa frekuensi kata atau bobot yang mirip frekuensi (seperti count dan TF-IDF) yang bernilai non-negatif.

Sementara itu, embedding IndoBERT berbentuk vektor dens dengan nilai kontinu yang bisa positif maupun negatif, dan tidak lagi merepresentasikan "jumlah kemunculan kata", tetapi makna kalimat di ruang vektor. Tipe fitur seperti ini tidak sesuai dengan asumsi probabilistik Multinomial Naive Bayes, sehingga performanya justru bisa tidak stabil atau menurun.

In [46]:
print(f"LogReg (TF-IDF) Test Accuracy      : {acc_logreg:.3f}")
print(f"Naive Bayes (TF-IDF) Test Accuracy : {acc_nb:.3f}")
print(f"LogReg (IndoBERT) Test Accuracy    : {acc_logreg_indobert:.3f}")

LogReg (TF-IDF) Test Accuracy      : 0.826
Naive Bayes (TF-IDF) Test Accuracy : 0.787
LogReg (IndoBERT) Test Accuracy    : 0.767


In [47]:
candidates = {
    "logreg_tfidf":      (acc_logreg, best_logreg),
    "naive_bayes_tfidf": (acc_nb, best_nb),
    "logreg_indobert":   (acc_logreg_indobert, best_logreg_indobert),
}

In [48]:
best_model_name, (best_acc, final_model) = max(
    candidates.items(),
    key=lambda item: item[1][0]  
)

In [49]:
print(f"Chosen model: {best_model_name} (accuracy = {best_acc:.3f})")

Chosen model: logreg_tfidf (accuracy = 0.826)


In [50]:
# Simpan semua model
print("\nMenyimpan semua model...")


Menyimpan semua model...


In [51]:
logreg_tfidf_path = "model/intent_model_logreg_tfidf.pkl"
joblib.dump(best_logreg, logreg_tfidf_path)
print(f"Saved LogReg TF-IDF to: {logreg_tfidf_path}")

Saved LogReg TF-IDF to: model/intent_model_logreg_tfidf.pkl


In [52]:
nb_tfidf_path = "model/intent_model_naive_bayes_tfidf.pkl"
joblib.dump(best_nb, nb_tfidf_path)
print(f"Saved Naive Bayes TF-IDF to: {nb_tfidf_path}")

Saved Naive Bayes TF-IDF to: model/intent_model_naive_bayes_tfidf.pkl


In [53]:
indobert_path = "model/intent_model_logreg_indobert.pkl"
joblib.dump(best_logreg_indobert, indobert_path)
print(f"Saved LogReg IndoBERT to: {indobert_path}")

Saved LogReg IndoBERT to: model/intent_model_logreg_indobert.pkl


In [54]:
def predict_intent_sentence(s):
    s_clean = preprocess(s)
    return final_model.predict([s_clean])[0]

In [55]:
tests = [
    "jam buka perpustakaan hari sabtu",
    "perpus maranatha buka sampe jam berapa ya?",
    "besok minggu perpus buka gak?",
    "jam operasional perpustakaan pas libur nasional gimana?",
    "hari ini perpus udah buka belum?",

    "ada buku basis data fathansyah gak",
    "ada buku tentang machine learning terbaru gak?",
    "cek dong buku pemrograman python masih tersedia ga",
    "di perpus ada novel laskar pelangi gak sih?",
    "kalo mau cari skripsi tentang data mining ada ga?",

    "cara booking ruang diskusi gimana",
    "book ruang belajar kelompok bisa lewat mana?",
    "ruang diskusi bisa dipake berapa jam maksimal?",
    "bisa reservasi ruang belajar lewat online gak?",

    "kalau telat balikin buku dendanya berapa",
    "telat ngembaliin buku 2 hari berapa ya?",
    "Kalau saya telat mengembalikan, konsekuensinya apa?",
    "kalau hilangin buku perpus dendanya gimana ya?",
    "batas maksimal telat pengembalian sebelum kena blokir berapa hari?",

    "cara akses e journal dari luar kampus",
    "akses database journal lewat wifi kos bisa gak?",
    "punya akses ke ieee atau sciencedirect gak ya?",
    "login e-resources pake akun apa ya?",
    "kalo lupa password e journal harus gimana?",

    "perpus maranatha ada dmn sih",
    "alamat lengkap perpustakaan maranatha di mana ya?",
    "nomor telepon perpustakaan ada?",
    "perpus ada di gedung mana ya di kampus?",

    "cara pinjam buku di perpus gimana",
    "bisa perpanjang peminjaman buku lewat online gak?",
    "kalo mau pinjem buku harus bawa ktm gak?",
    "maksimal bisa pinjam berapa buku sekaligus?",
    "lama peminjaman buku berapa hari ya?",

    "halo mlibbot",
    "hi bot, bisa bantu cari buku?",
    "p",
    "halo, ini perpus maranatha ya?",
]

In [56]:
for t in tests:
    print(f"{t!r} -> {predict_intent_sentence(t)}")

'jam buka perpustakaan hari sabtu' -> jam_buka
'perpus maranatha buka sampe jam berapa ya?' -> jam_buka
'besok minggu perpus buka gak?' -> jam_buka
'jam operasional perpustakaan pas libur nasional gimana?' -> jam_buka
'hari ini perpus udah buka belum?' -> jam_buka
'ada buku basis data fathansyah gak' -> cari_buku_judul
'ada buku tentang machine learning terbaru gak?' -> cari_rekomendasi
'cek dong buku pemrograman python masih tersedia ga' -> cek_ketersediaan_buku
'di perpus ada novel laskar pelangi gak sih?' -> cari_buku_judul
'kalo mau cari skripsi tentang data mining ada ga?' -> cari_buku_topik
'cara booking ruang diskusi gimana' -> layanan_ruang_diskusi
'book ruang belajar kelompok bisa lewat mana?' -> layanan_ruang_diskusi
'ruang diskusi bisa dipake berapa jam maksimal?' -> layanan_ruang_diskusi
'bisa reservasi ruang belajar lewat online gak?' -> panduan_perpanjangan
'kalau telat balikin buku dendanya berapa' -> info_denda
'telat ngembaliin buku 2 hari berapa ya?' -> info_denda
'Ka

Test dengan IndoBERT model

In [57]:
def predict_intent_sentence(s):
    s_clean = preprocess(s)
    return best_logreg_indobert.predict([s_clean])[0]

In [58]:
tests = [
    "jam buka perpustakaan hari sabtu",
    "perpus maranatha buka sampe jam berapa ya?",
    "besok minggu perpus buka gak?",
    "jam operasional perpustakaan pas libur nasional gimana?",
    "hari ini perpus udah buka belum?",

    "ada buku basis data fathansyah gak",
    "ada buku tentang machine learning terbaru gak?",
    "cek dong buku pemrograman python masih tersedia ga",
    "di perpus ada novel laskar pelangi gak sih?",
    "kalo mau cari skripsi tentang data mining ada ga?",

    "cara booking ruang diskusi gimana",
    "book ruang belajar kelompok bisa lewat mana?",
    "ruang diskusi bisa dipake berapa jam maksimal?",
    "bisa reservasi ruang belajar lewat online gak?",

    "kalau telat balikin buku dendanya berapa",
    "telat ngembaliin buku 2 hari berapa ya?",
    "Kalau saya telat mengembalikan, konsekuensinya apa?",
    "kalau hilangin buku perpus dendanya gimana ya?",
    "batas maksimal telat pengembalian sebelum kena blokir berapa hari?",

    "cara akses e journal dari luar kampus",
    "akses database journal lewat wifi kos bisa gak?",
    "punya akses ke ieee atau sciencedirect gak ya?",
    "login e-resources pake akun apa ya?",
    "kalo lupa password e journal harus gimana?",

    "perpus maranatha ada dmn sih",
    "alamat lengkap perpustakaan maranatha di mana ya?",
    "nomor telepon perpustakaan ada?",
    "perpus ada di gedung mana ya di kampus?",

    "cara pinjam buku di perpus gimana",
    "bisa perpanjang peminjaman buku lewat online gak?",
    "kalo mau pinjem buku harus bawa ktm gak?",
    "maksimal bisa pinjam berapa buku sekaligus?",
    "lama peminjaman buku berapa hari ya?",

    "halo mlibbot",
    "hi bot, bisa bantu cari buku?",
    "p",
    "halo, ini perpus maranatha ya?",
]

In [59]:
for t in tests:
    print(f"{t!r} -> {predict_intent_sentence(t)}")

'jam buka perpustakaan hari sabtu' -> jam_buka
'perpus maranatha buka sampe jam berapa ya?' -> jam_buka
'besok minggu perpus buka gak?' -> salam
'jam operasional perpustakaan pas libur nasional gimana?' -> jam_buka
'hari ini perpus udah buka belum?' -> salam
'ada buku basis data fathansyah gak' -> cari_buku_penulis
'ada buku tentang machine learning terbaru gak?' -> cari_buku_judul
'cek dong buku pemrograman python masih tersedia ga' -> cari_buku_judul
'di perpus ada novel laskar pelangi gak sih?' -> cari_buku_penulis
'kalo mau cari skripsi tentang data mining ada ga?' -> cari_buku_judul
'cara booking ruang diskusi gimana' -> layanan_ruang_diskusi
'book ruang belajar kelompok bisa lewat mana?' -> layanan_ruang_diskusi
'ruang diskusi bisa dipake berapa jam maksimal?' -> layanan_ruang_diskusi
'bisa reservasi ruang belajar lewat online gak?' -> layanan_ejournal_ebook
'kalau telat balikin buku dendanya berapa' -> info_denda
'telat ngembaliin buku 2 hari berapa ya?' -> panduan_perpanjangan
