In [1]:
# Library Load Model
import pandas as pd
import numpy as np
import tensorflow as tf
import zipfile
import os
import nltk
import re

# Library Pre-Processing
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from tensorflow.keras.models import Sequential

nltk.download('stopwords')
nltk.download('punkt')




[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\septi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\septi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
# Specify the path to your zip file
zip_file_path = 'model_logreg.zip'

# Read the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    
    # Extract all contents to a directory named 'unzipped_model'
    zip_ref.extractall('unzipped_model')

# Load the model from the unzipped folder
unzipped_model_path = os.path.join('unzipped_model', 'model_logreg')
model = load_model(unzipped_model_path)





In [3]:
# Define Stopwords
stpwds_id = list(set(stopwords.words('english')))

# Add custom stopwords
custom_stopwords = [
    'the', 'to', 'and', 'of', 'a', 'in', 'for', '#coronavirus', 'is', 'are', 'on', 'I', 'you', 'at', 'prices', 'with', 'have', 'this', 'that', 'be', 'grocery',
    'store', 'as', 'food', 'supermarket', 'from', 'people', 'your', 'will', 'it', 'all', 'The', 'COVID-19', 'we', 'not', 'has', '&', 'by', 'our', 'or', '19',
    'can', 'out', 'my', 'up', '#COVID19', 'their', 'more', 'they', 'during'
]
stpwds_id.extend(custom_stopwords)

# Define Stemming
stemmer = PorterStemmer()

In [4]:
# Create A Function for Text Preprocessing
def text_preprocessing(text):
    # Case folding
    text = text.lower()

    # Mention removal
    text = re.sub(r'https?://(?:www\.[^\s\n\r]+|[^\s\n\r]+)', '', text)

    # Hashtags removal
    text = re.sub(r'#', '', text)

    # Newline removal (\n)
    text = re.sub(r'[\n\r]', '', text)

    # Replaces the numbers with an empty string
    text = re.sub(r'\d+', '', text)

    # Whitespace removal
    text = text.strip()

    # URL removal
    text = re.sub(r"http\S+", " ", text)
    text = re.sub(r"www.\S+", " ", text)

    # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc.)
    text = re.sub("[^A-Za-z\s']", " ", text)

    # Tokenization
    tokens = word_tokenize(text)

    # Stopwords removal
    tokens = [word for word in tokens if word not in stpwds_id]

    # Stemming
    tokens = [stemmer.stem(word) for word in tokens]

    # Combining Tokens
    text = ' '.join(tokens)

    return text

In [5]:
# Create New Data
df_inf = {
    'Review' : '''First, I got delayed and after I waited for almost an hour, the flight got cancelled last minute.'''}

df_inf = pd.DataFrame([df_inf])
df_inf

Unnamed: 0,Review
0,"First, I got delayed and after I waited for al..."


In [6]:
# Applying Text Preprocessing to the Dataset
df_inf['Processed Review'] = df_inf['Review'].apply(lambda x: text_preprocessing(x))
df_inf

Unnamed: 0,Review,Processed Review
0,"First, I got delayed and after I waited for al...",first got delay wait almost hour flight got ca...


In [7]:
model.predict(df_inf['Processed Review'])



array([[0.9524613 , 0.04722199]], dtype=float32)

In [8]:
# Predict using ANN
y_pred_inf = model.predict(df_inf['Processed Review'])
y_pred_inf = np.argmax(y_pred_inf)
if y_pred_inf == 0:
    print(f'Not Recommended')
elif y_pred_inf == 1:
    print(f'Recommended')

Not Recommended
