<a href="https://colab.research.google.com/github/PuputHanifah/Skripsi/blob/main/predict_bidang.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LLM Merak

In [None]:
import warnings; warnings.simplefilter('ignore')

try:
    import google.colab; IN_COLAB = True
    print("Installing the required modules")
    !pip install protobuf==4.24.4 --q
    !pip install bitsandbytes==0.41.1 --q
    !pip install transformers==4.34.1 --q
    !pip install peft==0.5.0 --q
    !pip install accelerate==0.26.0 --q
    !pip install einops==0.6.1 scipy sentencepiece datasets --q
    !pip install transformers peft datasets --q
    print("preparing directories and assets")
    !mkdir data images output models
    #!wget https://raw.githubusercontent.com/taudata...
except:
    IN_COLAB = False
    print("Running the code locally, please make sure all the python module versions agree with colab environment and all data/assets downloaded")

In [None]:
import torch, numpy as np
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer
from peft import PeftModel, PeftConfig

seed = 0
np.random.seed(seed)
"Done"

In [None]:
model_id = "Ichsan2895/Merak-7B-v4"
config = AutoConfig.from_pretrained(model_id)

BNB_CONFIG = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_compute_dtype=torch.bfloat16,
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_quant_type="nf4",)

model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=BNB_CONFIG,
                                             device_map="auto",
                                             trust_remote_code=True)

tokenizer = LlamaTokenizer.from_pretrained(model_id)
"Done"

In [None]:
def generate_response(question: str) -> str:
    chat = [
      {"role": "system", "content": "Anda adalah Merak, sebuah model kecerdasan buatan yang dilatih oleh Muhammad Ichsan. Mohon jawab pertanyaan berikut dengan benar, faktual, dan ramah."},
      {"role": "user", "content": question},
    ]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)

    with torch.no_grad():
        outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"),
                           attention_mask=inputs.attention_mask,
                           eos_token_id=tokenizer.eos_token_id,
                           pad_token_id=tokenizer.eos_token_id,
                           max_new_tokens=256)
        response = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]

        assistant_start = f'''{question} \n assistant\n '''
        response_start = response.find(assistant_start)
        return response[response_start + len(assistant_start) :].strip()

"Done"

# Connect to Database

In [None]:
def conMysql(dbPar, maxTry=7, verbose=False):
    try_ = 0 # dbPar = {'db_': 'rpi', 'usr':'root', 'pas':'', 'hst':'localhost'}
    while try_<maxTry:
        try:
            con =  pymysql.connect(host=dbPar['host'],user=dbPar['user'],passwd=dbPar['pass'],db=dbPar['db_'])
            if verbose:
                with con.cursor() as cur:
                    cur.execute('SELECT VERSION()')
                    version = cur.fetchone()
                    print(f'Connected! Current Database version: {version[0]}')
            return con
        except (pymysql.Error) as e:
            print ("Error Connecting to MySQL %d: %s \n Retrying after 3 seconds ... " % (e.args[0],e.args[1]))
            try_ += 1; time.sleep(3)

# Predict Bidang

In [None]:
def predictBidang(status):
	kategori = ["ekonomi", "politik", "sosial", "budaya", "pendidikan", "keamana", "lingkungan", "teknologi", "kesehatan"]
	prompt_ = 'apa kategori yang cocok dari kalimat berikut "{}". apakah {} ?'.format(status, ', '.join(kategori))
	jawaban = generate_response(prompt_)
	for cat in kategori:
		if cat.lower().strip() in jawaban:
			return cat.lower().strip()
return False

In [None]:
if __name__ == '__main__':
    table = "religion"
    dbTau = {'db_': 'pi', 'tblKey': '', 'tblLang':'', 'user':'fstuinjkt', 'pass':'TauData#123', 'host':'localhost'} # DB local
    maxTry = 7
    nBatch = 10
    nSleep = 60

    print("Testing Database Connection ... ", end='', flush=True)
    try:
        con = conMysql(dbTau, maxTry=maxTry, verbose=True)
        con.close(); del con
        print(" Connected to database '{}' ".format(dbTau['host']), end='', flush=True)
    except Exception as err_:
        print(err_)
        sys.exit("Program terminated because unable to connect to the database.")


In [None]:
while True:
    try:
        # ========================= 1. Ambil Data =========================
        print("Ambil data dari database.", flush=True)
        db = conMysql(dbTau, maxTry=maxTry, verbose=False)
        qry1 = f"""
        SELECT id_, title, snippet
        FROM {table}
        WHERE bidang IS NULL AND entity NOT IN ('hajiUmroh', 'hajiUmroh2')
        {nBatch}
        """
        df = None
        try:
            df = pd.read_sql(qry1, db)
        except:
            pass

       # ========================= 2. Prediksi Bidang =========================
        if df is not None and not df.empty:
            for i, d in df.iterrows():
                status = f"{d.title} {d.snippet}"
                bidang_ = predictBidang(status)
                qry2 = f"""
                UPDATE {table}
                SET bidang = '{bidang_}'
                WHERE id_ = {d.id_}
                """
                with db.cursor() as cur:
                    cur.execute(qry2)
                    db.commit()

            db.close()
            print("Data selesai diprediksi. Melanjutkan ...")

        if df is None and df.empty:
            print("Data habis. Menunggu data baru ...")
            for i in range(nSleep):
                time.sleep(1)
                print("Zz .. ", end="", flush=True, end="")
            print()