In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM, Embedding, GlobalAveragePooling1D, GlobalAveragePooling2D, Dense
import matplotlib.pyplot as plt
import numpy as np
import nltk
from nltk.corpus import stopwords
import json

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
df = pd.read_csv('/content/yelp_labelled.txt', names=['sentence', 'label'], sep='\t')

In [None]:
# Mengubah seluruh text kedalam bentuk lowercase
df['sentence'] = df['sentence'].str.lower()

# Menghilangkan stopwords
stop_word = set(stopwords.words('english'))

df['sentence'] = df['sentence'].apply(lambda x:' '.join([word for word in x.split() if word not in (stop_word)]))

# Melakukan split dataset
sentence = df['sentence'].values
label = df['label'].values

sentence_train, sentence_test, label_train, label_test = train_test_split(sentence, label, test_size=0.2, shuffle=False)

# Membuat tokenisasi
filt = '!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ' # Filter untuk menghilangkan symbols

tokenizer = Tokenizer(num_words=2000, oov_token="<OOV>", filters=filt)

tokenizer.fit_on_texts(sentence_train)

# Menyimpan word_index kedalam sebuah file json
word_index = tokenizer.word_index

with open('word_index.json', 'w') as fp:
    json.dump(word_index, fp)

# Membuat sequences dan melakukan padding
train_sekuens = tokenizer.texts_to_sequences(sentence_train)
test_sekuens = tokenizer.texts_to_sequences(sentence_test)

train_padded = pad_sequences(train_sekuens,
                             maxlen=20,
                             padding='post',
                             truncating='post')
test_padded = pad_sequences(test_sekuens,
                            maxlen=20,
                            padding='post',
                            truncating='post')
#Tahap berikutnya adalah membuat dan melatih model menggunakan dataset yang telah kita siapkan sebelumnya.

# Membuat model
model = tf.keras.Sequential([
    Embedding(5000, 20, input_length=20),
    GlobalAveragePooling1D(),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train model
num_epochs = 30
history = model.fit(train_padded, label_train,
                    epochs=num_epochs,
                    validation_data=(test_padded, label_test),
                    verbose=1)



Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.5372 - loss: 0.6916 - val_accuracy: 0.2400 - val_loss: 0.7366
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5460 - loss: 0.6857 - val_accuracy: 0.2400 - val_loss: 0.7918
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5621 - loss: 0.6806 - val_accuracy: 0.2400 - val_loss: 0.7631
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5515 - loss: 0.6763 - val_accuracy: 0.2400 - val_loss: 0.7577
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6288 - loss: 0.6418 - val_accuracy: 0.4350 - val_loss: 0.6924
Epoch 6/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.8515 - loss: 0.5581 - val_accuracy: 0.6300 - val_loss: 0.6282
Epoch 7/30
[1m25/25[0m [32m━━━━━━━

In [None]:
model.save("model.h5")



In [None]:
# Install tensorflowjs
!pip install tensorflowjs

# Convert model.h5 to model
!tensorflowjs_converter --input_format=keras model.h5 tfjs_model

Collecting tensorflowjs
  Downloading tensorflowjs-4.20.0-py3-none-any.whl.metadata (3.2 kB)
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.0 kB)
Collecting packaging~=23.1 (from tensorflowjs)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting tensorflow<3,>=2.13.0 (from tensorflowjs)
  Downloading tensorflow-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting wurlitzer (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading wurlitzer-3.1.1-py3-none-any.whl.metadata (2.5 kB)
Collecting ydf (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading ydf-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting ml-dtypes>=0.2.0 (from jax>=0.4.13->tensorflowjs)
  Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux_2_17_x86_64.many

2024-08-07 09:06:27.054253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-07 09:06:27.104163: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-07 09:06:27.104270: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
failed to lookup keras version from the file,
    this is likely a weight only file
