<a href="https://colab.research.google.com/github/Aadhimozhi/DL_Lab/blob/main/exno_04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import tarfile
import urllib.request
import tensorflow as tf
from tensorflow.keras import layers
import re
import string

In [None]:
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
urllib.request.urlretrieve(url, "aclImdb_v1.tar.gz")
with tarfile.open("aclImdb_v1.tar.gz", "r:gz") as tar:
    tar.extractall()

In [None]:
train_dir = os.path.join("aclImdb", "train")
test_dir = os.path.join("aclImdb", "test")
os.system(f"rm -rf {os.path.join(train_dir, 'unsup')}")

0

In [None]:
batch_size = 32
raw_train = tf.keras.utils.text_dataset_from_directory(
    train_dir, batch_size=batch_size, validation_split=0.2,
    subset='training', seed=42)
raw_val = tf.keras.utils.text_dataset_from_directory(
    train_dir, batch_size=batch_size, validation_split=0.2,
    subset='validation', seed=42)
raw_test = tf.keras.utils.text_dataset_from_directory(
    test_dir, batch_size=batch_size)

Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.
Found 25000 files belonging to 2 classes.


In [None]:
def custom_standardization(text):
    text = tf.strings.lower(text)
    text = tf.strings.regex_replace(text, "<br />", " ")
    return tf.strings.regex_replace(text, f"[{re.escape(string.punctuation)}]", "")

vectorizer = layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=10000,
    output_mode='int',
    output_sequence_length=250)

vectorizer.adapt(raw_train.map(lambda x, y: x))


In [None]:
def vectorize(text, label):
    return vectorizer(text), label

AUTOTUNE = tf.data.AUTOTUNE
train_ds = raw_train.map(vectorize).cache().prefetch(AUTOTUNE)
val_ds = raw_val.map(vectorize).cache().prefetch(AUTOTUNE)
test_ds = raw_test.map(vectorize).cache().prefetch(AUTOTUNE)

# Model
model = tf.keras.Sequential([
    layers.Embedding(10001, 16),
    layers.Conv1D(8, 7, activation="relu"),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.2),
    layers.Dense(8, activation="relu"),
    layers.Dense(1)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train
model.fit(train_ds, validation_data=val_ds, epochs=10)

# Evaluate
loss, acc = model.evaluate(test_ds)
print("Test Accuracy:", acc)

# Export and inference
export_model = tf.keras.Sequential([
    vectorizer, model, layers.Activation("sigmoid")
])
export_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
export_model.save("sentiment_model.keras")

# Inference
reviews = tf.constant([
    "The movie is very boring",
    "A Good Movie",
    "very bad worst movie",
    "Worst movie, boring"
])
predictions = export_model(reviews)
for r, p in zip(reviews, predictions):
    print(f"{r.numpy().decode():<30} : {p.numpy().squeeze():.3f}")


Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step - accuracy: 0.5690 - loss: 0.6385 - val_accuracy: 0.8250 - val_loss: 0.3566
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8535 - loss: 0.3438 - val_accuracy: 0.8596 - val_loss: 0.2942
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - accuracy: 0.8887 - loss: 0.2718 - val_accuracy: 0.8680 - val_loss: 0.2818
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9079 - loss: 0.2303 - val_accuracy: 0.8678 - val_loss: 0.2892
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9170 - loss: 0.1978 - val_accuracy: 0.8666 - val_loss: 0.2951
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9348 - loss: 0.1671 - val_accuracy: 0.8734 - val_loss: 0.3084
Epoch 7/10
[1m625

  return saving_lib.save_model(model, filepath)


The movie is very boring       : 0.148
A Good Movie                   : 0.307
very bad worst movie           : 0.087
Worst movie, boring            : 0.167
