# Setup

## Install & Import

In [1]:
from google.colab import drive
drive.mount('/content/drive/')
kamus_baku = '/content/drive/MyDrive/TUGAS AKHIR/kamus_baku.csv'
stopwords = '/content/drive/MyDrive/TUGAS AKHIR/stopwords.csv'

Mounted at /content/drive/


In [2]:
%%capture
!pip install python-terrier -q
!pip install --upgrade git+https://github.com/terrierteam/pyterrier_t5.git -q
!pip install -U sentence-transformers -q
!pip install --upgrade gensim
!pip install PySastrawi
!pip install rake-nltk
!pip install POT==0.4.0

In [None]:
!wget -O full_dataset.xml https://lumbung.cs.ui.ac.id/f/ed0627b7fa8e4984a672/?dl=1
!wget -O qrels_all.csv https://lumbung.cs.ui.ac.id/f/d2379cc795e84ed39605/?dl=1

In [4]:
from IPython.display import display
from xml.dom.minidom import parse, parseString
from pyterrier.measures import *

from pyterrier_t5 import MonoT5ReRanker, DuoT5ReRanker
from sentence_transformers import CrossEncoder, SentenceTransformer
from sentence_transformers.util import cos_sim

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

from gensim.models import Word2Vec, FastText

from rake_nltk import Rake

from ot import emd2

from scipy import stats
from scipy.spatial import distance
from scipy.spatial.distance import cosine

from sklearn import preprocessing
from sklearn.model_selection import KFold

import xml.etree.ElementTree as et
import pickle
import random
import pyterrier as pt
import pandas as pd
import numpy as np
import xgboost as xgb
import torch
import json
import os
import re
import math
import nltk
nltk.download('punkt')

if not pt.started():
  pt.init(version='snapshot')

## Set Random Seed

In [5]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7cb6a26dfb30>

## Script of Stopwords

In [6]:
stops = pd.read_csv(stopwords, header=None)[0].values.tolist()

In [7]:
def remove_stopwords(text):
    tokens = text.lower().split()
    cleaned_tokens = [token for token in tokens if token not in stops]
    return (" ".join(cleaned_tokens))

## Script of Special Chars

In [8]:
# isu karakter nonalphanumeric pada PyTerrier: https://github.com/terrier-org/pyterrier/issues/62
def remove_specials(text):
  pattern = re.compile('[\W_]+')
  return pattern.sub(' ', text)

## Script of Normalization

In [9]:
# script normalisasi dari Nurhayati (2019)

df_kamus_baku = pd.read_csv(kamus_baku, index_col=0)

def normalisasi_singkatan(text):
    text = text.lower()
    tokens = nltk.word_tokenize(text)
    token_after_normalized = []

    for token in tokens:
        if token in df_kamus_baku['singkatan'].values:
            token = df_kamus_baku.loc[df_kamus_baku['singkatan'] == token, 'asli'].values[0]
        token_after_normalized.append(token)

    normal_text = ' '.join(token_after_normalized)
    return normal_text

def normalisasi_kata_ulang(text):
    text = text.lower()
    # contoh: kira2 -> kira-kira
    for found in re.finditer(r'[A-z]+(2|2x)\b', text):
        found = found.group(0)
        replace = re.sub(r'(2|2x)', "", found)
        replace = replace + '-' + replace
        text = text.replace(found, replace)

    # contoh: apa apa -> apa-apa
    for found in re.finditer(r'\b([A-z]+)\b\s+\1', text):
        found = found.group(0)
        word = found.split(' ');
        word = word[0] + "-" + word[0]
        text = text.replace(found, word)
    return text

def normalisasi(question):
    question = normalisasi_singkatan(question)
    question = normalisasi_kata_ulang(question)

    return question

# Preprocess Data

In [10]:
%%time

xtree = et.parse("full_dataset.xml")
xroot = xtree.getroot()
rows = []

for node in xroot:
  id = node.find("ID").text.strip() if node.find("ID").text is not None else None
  url = node.find("URL").text.strip() if node.find("URL").text is not None else None
  tag = node.find("TAG").find("ITEM").text.strip() if node.find("TAG").text is not None else None
  keluhan = node.find("KELUHAN")
  keluhan_judul = keluhan.find("JUDUL").text.strip() if keluhan.find("JUDUL").text is not None else None
  keluhan_isi = keluhan.find("ISI").text.strip() if keluhan.find("ISI").text is not None else None
  keluhan_waktu = keluhan.find("WAKTU").text.strip() if keluhan.find("WAKTU").text is not None else None
  jawaban = node.find("JAWABAN")
  jawaban_isi = jawaban.find("ISI").text.strip() if jawaban.find("ISI").text is not None else None
  jawaban_waktu = jawaban.find("WAKTU").text.strip() if jawaban.find("WAKTU").text is not None else None

  rows.append({"docno": id, "url": url, "tag": tag, \
                "keluhan_judul": keluhan_judul, "keluhan_isi": keluhan_isi, "keluhan_waktu": keluhan_waktu, \
                "jawaban_isi": jawaban_isi, "jawaban_waktu": jawaban_waktu})

data = pd.DataFrame(rows, columns = ['docno', 'url', 'tag', 'keluhan_judul', 'keluhan_isi', 'keluhan_waktu', 'jawaban_isi', 'jawaban_waktu'])

CPU times: user 4.05 s, sys: 365 ms, total: 4.41 s
Wall time: 4.44 s


In [11]:
keluhan_judul_kosong = data[data["keluhan_judul"].isna()]
len(keluhan_judul_kosong)

102

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86731 entries, 0 to 86730
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   docno          86731 non-null  object
 1   url            86731 non-null  object
 2   tag            86731 non-null  object
 3   keluhan_judul  86629 non-null  object
 4   keluhan_isi    86106 non-null  object
 5   keluhan_waktu  24405 non-null  object
 6   jawaban_isi    85289 non-null  object
 7   jawaban_waktu  24405 non-null  object
dtypes: object(8)
memory usage: 5.3+ MB


## Format

Pilih (comment/uncomment) format data sesuai skenario yang ingin dijalankan

### Judul

In [13]:
# data = data[["docno", "keluhan_judul"]].drop_duplicates()
# data.fillna(' ', inplace=True)
# data["keluhan"] = data["keluhan_judul"]
# data = data.rename(columns = {"keluhan_judul": "keluhan_raw"})
# data = data[["docno", "keluhan_raw", "keluhan"]]

### Judul + isi

In [14]:
data = data[["docno", "keluhan_judul", "keluhan_isi"]].drop_duplicates()
data.fillna(' ', inplace=True)
data["keluhan"] = data["keluhan_judul"] + " " + data["keluhan_isi"]
data['keluhan_raw'] = data["keluhan_judul"] + ". " + data["keluhan_isi"]
data = data[["docno", "keluhan_raw", "keluhan"]]

## Remove Special Chars

In [15]:
%%time

keluhan = []
for index, line in data.iterrows():
  keluhan.append(remove_specials(str(line['keluhan'])) if line['keluhan'] is not None else None)
data['keluhan'] = keluhan

CPU times: user 9.87 s, sys: 59.4 ms, total: 9.93 s
Wall time: 10.1 s


## Stem

In [16]:
factory = StemmerFactory()
stemmer = factory.create_stemmer()

In [17]:
# %%time
# # Comment/uncomment sesuai skenario pemrosesan data yang ingin dicoba
# keluhan = []
# for index, line in data.iterrows():
#   keluhan.append(stemmer.stem(line['keluhan']) if line['keluhan'] is not None else None)
# data['keluhan'] = keluhan

## Normalize

In [18]:
# %%time
# # Comment/uncomment sesuai skenario pemrosesan data yang ingin dicoba
# keluhan = []
# for index, line in data.iterrows():
#   keluhan.append(normalisasi(line['keluhan']) if line['keluhan'] is not None else None)
# data['keluhan'] = keluhan

## Remove Stopwords

In [19]:
%%time
# Comment/uncomment sesuai skenario pemrosesan data yang ingin dicoba
keluhan = []
for index, line in data.iterrows():
  keluhan.append(remove_stopwords(line['keluhan']) if line['keluhan'] is not None else None)
data['keluhan'] = keluhan

CPU times: user 1min, sys: 161 ms, total: 1min
Wall time: 1min 3s


## Drop Duplicates

In [20]:
data.drop_duplicates(inplace=True)

In [21]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86731 entries, 0 to 86730
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   docno        86731 non-null  object
 1   keluhan_raw  86731 non-null  object
 2   keluhan      86731 non-null  object
dtypes: object(3)
memory usage: 2.0+ MB


## Drop Missing Values

In [22]:
data.dropna(subset=['keluhan'], inplace=True)

indexEmptyData = data[(data['keluhan'] == '') | (data['keluhan'] == ' ') | (data['keluhan'] == '.')].index
data.drop(indexEmptyData, inplace=True)

data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 86723 entries, 0 to 86730
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   docno        86723 non-null  object
 1   keluhan_raw  86723 non-null  object
 2   keluhan      86723 non-null  object
dtypes: object(3)
memory usage: 2.6+ MB


# Preprocess Qrels

In [23]:
qrels = pd.read_csv('qrels_all.csv')
qrels.drop(columns=qrels.columns[0], axis=1, inplace=True)
qrels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6110 entries, 0 to 6109
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   qid     6110 non-null   object
 1   docno   6110 non-null   object
 2   qtitle  6110 non-null   object
 3   qbody   5967 non-null   object
 4   dtitle  6110 non-null   object
 5   dbody   6110 non-null   object
 6   label   6110 non-null   int64 
dtypes: int64(1), object(6)
memory usage: 334.3+ KB


## Label

*   Nonrelevant --> 0,
*   Relevant or partial --> 1



In [24]:
label = []
for index, line in qrels.iterrows():
  label.append(1 if line['label'] == 2 else line['label'])
qrels["label"] = label

## Remove Special Chars

In [25]:
qtitles, qbodies, dtitles, dbodies = [], [], [], []

for index, line in qrels.iterrows():
  qtitles.append(remove_specials(str(line['qtitle']).strip()))
  qbodies.append(remove_specials(str(line['qbody']).strip()))
  dtitles.append(remove_specials(str(line['dtitle']).strip()))
  dbodies.append(remove_specials(str(line['dbody']).strip()))

qrels["qtitle_nospecials"] = qtitles
qrels["qbody_nospecials"] = qbodies
qrels["dtitle_nospecials"] = dtitles
qrels["dbody_nospecials"] = dbodies

## Drop Duplicates

In [26]:
qrels.drop_duplicates(inplace=True)

In [27]:
qrels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6110 entries, 0 to 6109
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   qid                6110 non-null   object
 1   docno              6110 non-null   object
 2   qtitle             6110 non-null   object
 3   qbody              5967 non-null   object
 4   dtitle             6110 non-null   object
 5   dbody              6110 non-null   object
 6   label              6110 non-null   int64 
 7   qtitle_nospecials  6110 non-null   object
 8   qbody_nospecials   6110 non-null   object
 9   dtitle_nospecials  6110 non-null   object
 10  dbody_nospecials   6110 non-null   object
dtypes: int64(1), object(10)
memory usage: 525.2+ KB


## Format

Pilih (comment/uncomment) format data sesuai skenario yang ingin dijalankan

### Judul

In [28]:
# qrels.drop_duplicates(inplace=True)
# qrels.fillna(' ', inplace=True)
# qrels["query_nospecials"] = qrels["qtitle_nospecials"]
# qrels["doc_nospecials"] = qrels["dtitle_nospecials"]
# qrels = qrels.rename(columns = {"qtitle": "query_raw", "dtitle": "doc_raw"})
# qrels = qrels[["qid", "docno", "query_raw", "query_nospecials", "doc_raw", "doc_nospecials", "label"]]

### Judul + isi

In [29]:
qrels.drop_duplicates(inplace=True)
qrels.fillna(' ', inplace=True)

qrels["query_nospecials"] = qrels["qtitle_nospecials"] + " " + qrels["qbody_nospecials"]
qrels["query_raw"] = qrels["qtitle"] + ". " + qrels["qbody"]

qrels["doc_nospecials"] = qrels["dtitle_nospecials"] + " " + qrels["dbody_nospecials"]
qrels["doc_raw"] = qrels["dtitle"] + ". " + qrels["dbody"]

qrels = qrels[["qid", "docno", "query_raw", "query_nospecials", "doc_raw", "doc_nospecials", "label"]]

qrels.head()

Unnamed: 0,qid,docno,query_raw,query_nospecials,doc_raw,doc_nospecials,label
0,Q1,KD-14969,"Cara mengatasi pusing, lemas dan hidung berair...",Cara mengatasi pusing lemas dan hidung berair ...,mimisan dan polip. malam dok-dok hidung saya m...,mimisan dan polip malam dok dok hidung saya mi...,0
1,Q1,KD-10780,"Cara mengatasi pusing, lemas dan hidung berair...",Cara mengatasi pusing lemas dan hidung berair ...,nyeri kepala dan lendir tidak sedap dari hidun...,nyeri kepala dan lendir tidak sedap dari hidun...,1
2,Q1,AD-11170,"Cara mengatasi pusing, lemas dan hidung berair...",Cara mengatasi pusing lemas dan hidung berair ...,demam pusing dan bercak-bercak merah pada ...,demam pusing dan bercak bercak merah pada kuli...,1
3,Q1,KD-34954,"Cara mengatasi pusing, lemas dan hidung berair...",Cara mengatasi pusing lemas dan hidung berair ...,common cold. dok . bermula pada hari jumat kem...,common cold dok bermula pada hari jumat kemari...,1
4,Q1,KD-42528,"Cara mengatasi pusing, lemas dan hidung berair...",Cara mengatasi pusing lemas dan hidung berair ...,ingus dari hidung. selamat malam dok akhir-a...,ingus dari hidung selamat malam dok akhir akhi...,1


# Preprocess Queries

In [30]:
queries = qrels[["qid", "query_raw", "query_nospecials"]].drop_duplicates()
queries.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 0 to 5941
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   qid               45 non-null     object
 1   query_raw         45 non-null     object
 2   query_nospecials  45 non-null     object
dtypes: object(3)
memory usage: 1.4+ KB


## Preprocessing Variation

In [31]:
%%time
query, query_stemmed, query_nostopwords, query_normalized, \
query_stemmed_nostopwords, query_stemmed_normalized, query_normalized_nostopwords, \
query_stemmed_normalized_nostopwords = [], [], [], [], [], [], [], []

for index, line in queries.iterrows():
  query_stemmed.append(stemmer.stem(line['query_nospecials']))
  query_nostopwords.append(remove_stopwords(line['query_nospecials']))
  query_normalized.append(normalisasi(line['query_nospecials']))
queries['query_stemmed'] = query_stemmed
queries['query_nostopwords'] = query_nostopwords
queries['query_normalized'] = query_normalized

for index, line in queries.iterrows():
  query_stemmed_nostopwords.append(remove_stopwords(line['query_stemmed']))
  query_stemmed_normalized.append(normalisasi(line['query_stemmed']))
  query_normalized_nostopwords.append(remove_stopwords(line['query_normalized']))
queries['query_stemmed_nostopwords'] = query_stemmed_nostopwords
queries['query_stemmed_normalized'] = query_stemmed_normalized
queries['query_normalized_nostopwords'] = query_normalized_nostopwords

for index, line in queries.iterrows():
  query_stemmed_normalized_nostopwords.append(remove_stopwords(line['query_stemmed_normalized']))
queries['query_stemmed_normalized_nostopwords'] = query_stemmed_normalized_nostopwords

CPU times: user 788 ms, sys: 32.2 ms, total: 820 ms
Wall time: 808 ms


In [34]:
# Ganti nama kolom query sesuai skenario pemrosesan data yang ingin dicoba
queries = queries.rename(columns = {"query_nostopwords": "query"})
queries = queries[['qid', 'query_raw', 'query']]

In [35]:
queries.head()

Unnamed: 0,qid,query_raw,query
0,Q1,"Cara mengatasi pusing, lemas dan hidung berair...",mengatasi pusing lemas hidung berair malam dok...
174,Q2,Telinga berdengung sebelah kiri. Selamat pagi ...,telinga berdengung sebelah kiri selamat pagi d...
308,Q3,Benjolan di sekitar kelamin. Sore dok..Maaf do...,benjolan kelamin sore dok maaf dok 4 mengalami...
472,Q4,Makanan untuk penderita penyakit lambung dan l...,makanan penderita penyakit lambung liver dok m...
624,Q5,Kebiasaan melamun atau berkhayal secara berleb...,kebiasaan melamun berkhayal dok pengidap malad...


## Drop Duplicates

In [36]:
queries.drop_duplicates(inplace=True)

## Drop Missing Values

In [37]:
queries.dropna(subset=['query'], inplace=True)

indexEmptyData = queries[(queries['query'] == '') | (queries['query'] == ' ') | (queries['query'] == '.')].index
queries.drop(indexEmptyData, inplace=True)

queries.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 0 to 5941
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   qid        45 non-null     object
 1   query_raw  45 non-null     object
 2   query      45 non-null     object
dtypes: object(3)
memory usage: 1.4+ KB


# Index

## Indexing

In [38]:
%%time
# input for PyTerrier indexer must be a dataframe containing "docno" and "text"

collection = data.rename(columns = {"keluhan_raw": "text"})

!rm -rf ./medical_ir_index
pd_indexer = pt.DFIndexer("./medical_ir_index", \
                          type = pt.index.IndexingType(1), \
                          tokeniser = "UTFTokeniser", \
                          stemmer = None, \
                          stopwords = None, \
                          blocks = True)
index_ref = pd_indexer.index(collection["keluhan"], collection)

collection.info()

<class 'pandas.core.frame.DataFrame'>
Index: 86723 entries, 0 to 86730
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   docno    86723 non-null  object
 1   text     86723 non-null  object
 2   keluhan  86723 non-null  object
dtypes: object(3)
memory usage: 2.6+ MB
CPU times: user 1min 31s, sys: 1.23 s, total: 1min 32s
Wall time: 1min 33s


## Index Overview

In [39]:
collection.head()

Unnamed: 0,docno,text,keluhan
0,DS-1,Mengapa Keringat Badan Sangat Berlebihan?. sel...,keringat badan selamat malam dokter fathurrosi...
1,DS-2,Mengapa Lengan Atas Sakit Walaupun Tidak Melak...,lengan sakit aktivitas siang dokter 4 lengan s...
2,DS-3,Berapa Batas Usia Kandungan Untuk Melakukan US...,batas usia kandungan usg batas usia kandungan ...
3,DS-4,Apakah Katarak Pada Bayi Harus Disembuhkan Den...,katarak bayi disembuhkan operasi selamat pagi ...
4,DS-5,Mengapa Badan Panas dan Ngilu Selama Berhari-h...,badan panas ngilu berhari nama ferini intan lu...


In [40]:
index_fact = pt.IndexFactory.of(index_ref)
print(index_fact.getCollectionStatistics().toString())

Number of documents: 86723
Number of terms: 74200
Number of postings: 1924462
Number of fields: 0
Number of tokens: 2575525
Field names: []
Positions:   true



# Evaluate Baseline

In [41]:
%%time

bm25 = pt.BatchRetrieve(index_ref, wmodel = "BM25")
pt.Experiment(
    [bm25],
    topics=queries,
    qrels=qrels,
    eval_metrics = [R@10, R@5, P@10, P@5, "map", "recip_rank", nDCG@5],
    names=['BM25']
)

CPU times: user 24 s, sys: 371 ms, total: 24.4 s
Wall time: 24.7 s


Unnamed: 0,name,R@10,R@5,P@10,P@5,map,recip_rank,nDCG@5
0,BM25,0.17427,0.113283,0.655556,0.737778,0.455594,0.857778,0.744891


In [42]:
def get_serp_metadata(serp):
    return pd.merge(serp, qrels[['qid', 'docno', 'label']], how='left', on=['qid', 'docno'])

def get_serp(observed_qid, model, get_qrels_label=True, k=15):
    model_result = model.transform(queries[queries['qid'] == observed_qid])
    if get_qrels_label:
        model_result = get_serp_metadata(model_result)
    top_metadata = model_result[model_result['rank'] < k].sort_values(by=['rank'])
    return top_metadata

def compare_rank_score(metric_scores, val_topics):
    df_scores = pd.DataFrame(metric_scores)

    observed_queries = []
    for index, row in df_scores.iterrows():
        observed_queries.append(queries[queries['qid'] == index]['query_raw'].values[0])
    df_scores['query_raw'] = observed_queries

    print("Rerank < Baseline (Baseline > Rerank)")
    display(df_scores[df_scores['BM25 >> LambdaMART XGBoost'] < df_scores['BM25 Only']])
    print()
    print("Rerank > Baseline (Baseline < Rerank)")
    display(df_scores[df_scores['BM25 >> LambdaMART XGBoost'] > df_scores['BM25 Only']])

def get_observed_doc(observed_docno):
    return data[data['docno'] == observed_docno]['keluhan'].values[0]

def get_observed_query(observed_qid):
    return queries[queries['qid'] == observed_qid]['query'].values[0]

# Export

In [44]:
data = data.rename(columns = {"keluhan": "keluhan_preprocessed", "keluhan_raw": "keluhan"})
data.head()

Unnamed: 0,docno,keluhan,keluhan_preprocessed
0,DS-1,Mengapa Keringat Badan Sangat Berlebihan?. sel...,keringat badan selamat malam dokter fathurrosi...
1,DS-2,Mengapa Lengan Atas Sakit Walaupun Tidak Melak...,lengan sakit aktivitas siang dokter 4 lengan s...
2,DS-3,Berapa Batas Usia Kandungan Untuk Melakukan US...,batas usia kandungan usg batas usia kandungan ...
3,DS-4,Apakah Katarak Pada Bayi Harus Disembuhkan Den...,katarak bayi disembuhkan operasi selamat pagi ...
4,DS-5,Mengapa Badan Panas dan Ngilu Selama Berhari-h...,badan panas ngilu berhari nama ferini intan lu...


In [45]:
queries = queries.rename(columns = {"query": "query_preprocessed", "query_raw": "query"})
queries.head()

Unnamed: 0,qid,query,query_preprocessed
0,Q1,"Cara mengatasi pusing, lemas dan hidung berair...",mengatasi pusing lemas hidung berair malam dok...
174,Q2,Telinga berdengung sebelah kiri. Selamat pagi ...,telinga berdengung sebelah kiri selamat pagi d...
308,Q3,Benjolan di sekitar kelamin. Sore dok..Maaf do...,benjolan kelamin sore dok maaf dok 4 mengalami...
472,Q4,Makanan untuk penderita penyakit lambung dan l...,makanan penderita penyakit lambung liver dok m...
624,Q5,Kebiasaan melamun atau berkhayal secara berleb...,kebiasaan melamun berkhayal dok pengidap malad...


In [46]:
with open('nostops_data_23-03-24.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('nostops_qrels_23-03-24.pickle', 'wb') as handle:
    pickle.dump(qrels, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('nostops_queries_23-03-24.pickle', 'wb') as handle:
    pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL)