# Chairil Anwar Bot

## Inisialisasi

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, Dense, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import string
import numpy as np
import os
import random

def load_sentences(filepath):
    def special_chars_to_words(sentence):
        special_char_dict = {
            "0": "nol",
            "1": "satu",
            "2": "dua",
            "3": "tiga",
            "4": "empat",
            "5": "lima",
            "6": "enam",
            "7": "tujuh",
            "8": "delapan",
            "9": "sembilan",
            "\n": " newline "
        }
        
        new_sentence = ""
        for c in sentence:
            if c in special_char_dict.keys():
                new_sentence += special_char_dict[c]
            else:
                new_sentence += c
                
        return new_sentence
        
    VALID_CHARACTERS = string.ascii_letters + " -\n" + string.digits
    SPACEABLE_CHARACTER = "-"
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            content = f.read().strip() + "\n\n"
            
        # Remove invalid characters
        modified_content = "".join([c for c in content if c in VALID_CHARACTERS])
        
        # Change spaceable character to space
        modified_content = "".join([" " if c in SPACEABLE_CHARACTER else c for c in modified_content])
        
        # Change special characters to words
        modified_content = special_chars_to_words(modified_content)
        
        # Remove double spaces
        modified_content = modified_content.replace("  ", " ")
        
        """
        # Get nonzero and non-one-word bait as sentence
        sentences = modified_content.split("\n\n")
        sentences = list(map(lambda sentence: (sentence + ("" if sentence.endswith("\n") else "\n")).replace("\n", " "), sentences))
        sentences = list(filter(lambda sentence: sentence != "", sentences))
        sentences = list(filter(lambda sentence: " " in sentence.strip(), sentences)) # Jika tidak ada spasi, berarti satu kata
        """
        
        # return sentences
        return [modified_content]
    
    except FileNotFoundError:
        print("Warning: File not found")
        return []
    
my_sentences = []
PATH = "data/chairil-anwar/"
for file in os.listdir(PATH):
    if file.endswith(".txt"):
        print("Loaded {}".format(file))
        my_sentences += load_sentences(PATH + file)
        
tokenizer = Tokenizer()
tokenizer.fit_on_texts(my_sentences)
total_words = len(tokenizer.word_index) + 1 # Kata kosong termasuk (yaitu token 0)

input_sequences = []
tokens = tokenizer.texts_to_sequences(my_sentences)
for token in tokens:
    for i in range(1, len(token)):
        n_gram_sequence = token[:i+1]
        input_sequences.append(n_gram_sequence)
        
max_sequence_len = max([len(sequence) for sequence in input_sequences])
print("Max seqeunce length:", max_sequence_len)

model = Sequential([
    Embedding(total_words, 128, input_length = max_sequence_len - 1),
    Bidirectional(LSTM(128)),
    Dense(total_words, activation = "softmax")
])
adam = Adam(lr = 0.01) # lr: learning rate
model.compile(loss="categorical_crossentropy", optimizer=adam, metrics=["accuracy"])

model.load_weights("./data/checkpoints/chairil-anwar-bot")

Loaded aku.txt
Loaded cerita-buat-dien-tamaela.txt
Loaded cinta-dan-benci.txt
Loaded cintaku-jauh-di-pulau.txt
Loaded derai-derai-cemara.txt
Loaded di-mesjid.txt
Loaded diponegoro.txt
Loaded doa.txt
Loaded hampa.txt
Loaded kawanku-dan-aku.txt
Loaded kepada-kawan.txt
Loaded kepada-peminta-minta.txt
Loaded krawang-bekasi.txt
Loaded lagu-siul.txt
Loaded nisan.txt
Loaded persetujuan-dengan-bung-karno.txt
Loaded prajurit-jaga-malam.txt
Loaded puisi-kehidupan.txt
Loaded rumahku.txt
Loaded sajak-putih.txt
Loaded sebuah-kamar.txt
Loaded selamat-tinggal.txt
Loaded senja-di-pelabuhan-kecil.txt
Loaded sia-sia.txt
Loaded tak-sepadan.txt
Loaded tuti-artic.txt
Loaded yang-terampas-dan-yang-terputus.txt
Max seqeunce length: 220
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializ

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2215b2f9470>

## Pembuatan Puisi

In [8]:
word_index = tokenizer.word_index
judul = " ".join(random.sample(list(tokenizer.word_index.keys()), 2))
# judul = "api unggun"
seed_text = judul
bait = 0
puisi_selesai = False
word_count = 2
already_newline = False
already_bait_baru = False

print("Judul:", judul)
print("Karya Chairil Anwar Bot\n\n")
print(" " + seed_text, end=" ")
while (not puisi_selesai) and word_count < 500:
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen = max_sequence_len - 1, padding = "pre")
    predicted = model.predict_classes(token_list, verbose = 0)
    output_word = ""
    for word, index in word_index.items():
        if index == predicted:
            output_word = word
            print(output_word if output_word != "newline" else "\n", end = " ")
            break
            
    if output_word == "newline":
        if already_newline and (not already_bait_baru):
            bait += 1
            already_bait_baru = True
            # print("Bait {} selesai".format(bait))
            
            if word_count >= 175:
                puisi_selesai = True # Berhenti di sini!
            
        already_newline = True
        # print("n")
    else:
        already_bait_baru = False
        already_newline = False
    
    seed_text += " " + output_word
    word_count += 1
    # print(".", end="")

if word_count == 500:
    print("\n(Dihentikan karena terlalu banyak kata)")
# print(seed_text.replace("newline", "\n"))

print("Selesai.")

Judul: menarik hujan
Karya Chairil Anwar Bot


 menarik hujan mengucur badan 
 berkakuan kapal kapal di pelabuhan 
 
 darahku mengental pekat aku tumpat pedat 
 
 siapa berkata kata 
 kawanku hanya rangka saja 
 karena dera mengelucak tenaga 
 
 dia bertanya jam berapa 
 
 sudah larut sekali 
 hilang tenggelam segala makna 
 dan gerak tak punya arti 
 
 maju 
 
 bagimu negeri 
 menyediakan api 
 
 punah di atas menghamba 
 binasa di atas ditindas 
 
 sesungguhnya jalan ajal baru tercapai 
 jika hidup harus merasai 
 
 maju 
 serbu 
 serang 
 terjang 
 
 dan aku akan lebih tidak peduli 
 
 aku mau hidup seribu tahun lagi 
 
 aku suka pada mereka yang masuk menemu malam 
 malam yang berwangi mimpi terlucut debu 
 waktu jalan aku tidak tahu apa nasib waktu 
 
 sudah itu kita tidak lagi mendengar deru kami 
 terbayang kami maju dan berdegap hati 
 
 kami bicara padamu dalam hening di malam sepi 
 jika dada rasa hampa dan jam dinding yang berdetak 
 kami mati muda yang tinggal tulang dilipu

Jika mau buat judul sendiri, silakan. Direkomendasikan menggunakan kata-kata yang dikenali oleh Chairil Anwar Bot di bawah ini:

In [29]:
print(*sorted(list(word_index.keys())), sep = "\n")

ada
adalah
adik
adikku
agustus
ah
ahasveros
ahasvros
air
airmataku
ajal
ajati
akan
akanan
akankah
akhirat
akhirnya
aku
allah
amboi
amoi
ampun
anak
angin
angkasa
angkat
antara
apa
apakah
api
apimu
arti
artic
asing
astagfirullah
atas
atau
awas
ayo
badan
bagiku
bagimu
bahagia
bahan
bahaya
bahwa
baik
balikkan
bangka
banyak
banyaknya
bapakku
bara
barisan
baru
batas
batu
beberapa
begini
beginilah
bekasi
beku
belakang
belum
benar
benci
bentuk
berada
beranak
berani
berapa
berarti
berasal
berbahagia
berbenah
berbini
bercerita
bercium
berdarah
berdegap
berdetak
berdetik
berenang
bergelut
bergenderang
bergerak
berhutang
beria
beribu
berikan
berjaga
berjagalah
berjalan
berkakuan
berkata
berkibar
berlabuh
berlalu
berlari
berlaut
berlayar
berleleran
berlupa
bermata
bermuka
bernyala
bernyawa
berpaling
berpalu
berpaut
berpeluk
berperang
bersama
bersandar
berselempang
berselisih
bersepeda
berserakan
bersimbah
bersuara
bertahta
bertahun
bertakhta
bertambah
bertanya
bertempik
bertemu
bertuba
bertudung
be

# For training

In [74]:
input_sequences = np.array(pad_sequences(input_sequences, maxlen = max_sequence_len, padding = "pre"))

xs = input_sequences[:,:-1]

labels = input_sequences[:,-1]
ys = tf.keras.utils.to_categorical(labels, num_classes = total_words)

In [None]:
history = model.fit(xs, ys, epochs=3, verbose=1)
print("Saving ...")
model.save_weights("./data/checkpoints/chairil-anwar-bot")
print("Save successful!")

Epoch 1/3