# Import Libraries

In [1]:
# Library Load Model
import pandas as pd
import numpy as np
import tensorflow as tf
import zipfile
import os
import nltk
import re

# Library Pre-Processing
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from tensorflow.keras.models import Sequential

nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

# Load Model

In [3]:
# Path dari file ZIP yang diunggah
zip_file_path = 'model.zip'  # Sesuaikan dengan nama file ZIP yang diunggah

# Ekstrak file ZIP
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall()

In [4]:
# Path ke direktori yang berisi saved_model.pb
model_dir = 'model'  # Sesuaikan dengan direktori hasil ekstraksi

# Memuat model
loaded_model = tf.keras.models.load_model(model_dir)

# Verifikasi model dengan melihat ringkasan
loaded_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, 47)                0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, 47, 128)           4130304   
                                                                 
 lstm (LSTM)                 (None, 47, 64)            49408     
                                                                 
 dropout (Dropout)           (None, 47, 64)            0         
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                        

In [5]:
# Define Stopwords
stpwds_id = list(set(stopwords.words('english')))

# Add custom stopwords
custom_stopwords = [
    'the', 'to', 'and', 'of', 'a', 'in', 'for', '#coronavirus', 'is', 'are', 'on', 'I', 'you', 'at', 'prices', 'with', 'have', 'this', 'that', 'be', 'grocery',
    'store', 'as', 'food', 'supermarket', 'from', 'people', 'your', 'will', 'it', 'all', 'The', 'COVID-19', 'we', 'not', 'has', '&', 'by', 'our', 'or', '19',
    'can', 'out', 'my', 'up', '#COVID19', 'their', 'more', 'they', 'during'
]
stpwds_id.extend(custom_stopwords)

# Define Stemming
stemmer = PorterStemmer()

In [6]:
# Create A Function for Text Preprocessing
def text_preprocessing(text):
    # Case folding
    text = text.lower()

    # Mention removal
    text = re.sub(r'https?://(?:www\.[^\s\n\r]+|[^\s\n\r]+)', '', text)

    # Hashtags removal
    text = re.sub(r'#', '', text)

    # Newline removal (\n)
    text = re.sub(r'[\n\r]', '', text)

    # Replaces the numbers with an empty string
    text = re.sub(r'\d+', '', text)

    # Whitespace removal
    text = text.strip()

    # URL removal
    text = re.sub(r"http\S+", " ", text)
    text = re.sub(r"www.\S+", " ", text)

    # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc.)
    text = re.sub("[^A-Za-z\s']", " ", text)

    # Tokenization
    tokens = word_tokenize(text)

    # Stopwords removal
    tokens = [word for word in tokens if word not in stpwds_id]

    # Stemming
    tokens = [stemmer.stem(word) for word in tokens]

    # Combining Tokens
    text = ' '.join(tokens)

    return text

# Predict

In [7]:
# Create New Data
df_inf = {
    'OriginalTweet' : '''
🌍 A reminder to stay vigilant against COVID-19. Keep safe by wearing masks, washing hands, and maintaining social distance. Together, we can overcome this pandemic. Stay healthy and look out for each other! #COVID19 #StaySafe #NewNormal
    '''}

df_inf = pd.DataFrame([df_inf])
df_inf

Unnamed: 0,OriginalTweet
0,\n🌍 A reminder to stay vigilant against COVID-...


In [8]:
# Applying Text Preprocessing to the Dataset
df_inf['tweet_processed'] = df_inf['OriginalTweet'].apply(lambda x: text_preprocessing(x))
df_inf

Unnamed: 0,OriginalTweet,tweet_processed
0,\n🌍 A reminder to stay vigilant against COVID-...,remind stay vigil covid keep safe wear mask wa...


In [9]:
loaded_model.predict(df_inf['tweet_processed'])



array([[0.00881296, 0.971491  , 0.01969606]], dtype=float32)

In [12]:
# Predict using ANN
y_pred_inf = loaded_model.predict(df_inf['tweet_processed'])
y_pred_inf = np.argmax(y_pred_inf)
if y_pred_inf == 0:
    print(f'That is negative Tweet')
elif y_pred_inf == 1:
    print(f'That is neutral Tweet')
elif y_pred_inf == 2:
    print(f'That is positive Tweet')

That is neutral Tweet
