# Sentiment Analysis Tool (Google Colab)
Generate synthetic text data, preprocess, vectorize (TF-IDF), train classifiers, evaluate, save model to Drive, and run interactive predictions.


In [1]:
!pip install nltk joblib --quiet


In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# Check numpy & sklearn (optional)
import sys
import importlib
import pkgutil

try:
    import numpy as np
    print("numpy:", np.__version__, "from", np.__file__)
except Exception as e:
    print("numpy import failed:", e)

try:
    import sklearn
    print("scikit-learn:", sklearn.__version__)
except Exception as e:
    print("sklearn import failed:", e)

print("Python:", sys.version)


numpy: 2.0.2 from /usr/local/lib/python3.12/dist-packages/numpy/__init__.py
scikit-learn: 1.6.1
Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]


In [4]:
# -------------------------------------------
# üîß FULL COLAB FIX FOR NUMPY / SCIPY / SKLEARN ERRORS
# -------------------------------------------

import sys, subprocess

print("‚è≥ Fixing your Colab environment...")

# 1) Clear pip cache (prevents loading corrupted wheels)
!pip cache purge

# 2) Force reinstall KNOWN-GOOD working versions
print("‚è≥ Reinstalling compatible numeric stack...")
!pip install -q --no-cache-dir --force-reinstall numpy==1.25.3 scipy scikit-learn

print("‚úî Reinstallation complete.")
print("üîÑ Restarting runtime so new binaries take effect...")

# 3) Auto-Restart Runtime
import os, time
try:
    import google.colab
    google.colab.runtime.restart()
except:
    print("‚ö† Please manually restart runtime: Runtime ‚Üí Restart Runtime")

# After restart, run your import cell again.


‚è≥ Fixing your Colab environment...
[0mFiles removed: 0
‚è≥ Reinstalling compatible numeric stack...
[31mERROR: Ignored the following versions that require a different python version: 1.21.2 Requires-Python >=3.7,<3.11; 1.21.3 Requires-Python >=3.7,<3.11; 1.21.4 Requires-Python >=3.7,<3.11; 1.21.5 Requires-Python >=3.7,<3.11; 1.21.6 Requires-Python >=3.7,<3.11[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement numpy==1.25.3 (from versions: 1.3.0, 1.4.1, 1.5.0, 1.5.1, 1.6.0, 1.6.1, 1.6.2, 1.7.0, 1.7.1, 1.7.2, 1.8.0, 1.8.1, 1.8.2, 1.9.0, 1.9.1, 1.9.2, 1.9.3, 1.10.0.post2, 1.10.1, 1.10.2, 1.10.4, 1.11.0, 1.11.1, 1.11.2, 1.11.3, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.13.3, 1.14.0, 1.14.1, 1.14.2, 1.14.3, 1.14.4, 1.14.5, 1.14.6, 1.15.0, 1.15.1, 1.15.2, 1.15.3, 1.15.4, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.19.0, 1.19.1, 1.19.2, 1.19.3, 1.19.4, 1.19.5

## Dataset
We will generate a synthetic dataset (positive / negative / neutral) using templates.  
Later you can replace this with a real dataset (CSV) by uploading into Colab or mounting Drive.


In [7]:
POSITIVE_ADJ = ['great', 'amazing', 'fantastic', 'love', 'excellent', 'awesome', 'delightful', 'pleasant']
NEGATIVE_ADJ = ['bad', 'terrible', 'awful', 'hate', 'poor', 'disappointing', 'horrible', 'worst']
NEUTRAL_ADJ  = ['okay', 'fine', 'average', 'mediocre', 'normal']

SUBJECTS = ['movie', 'product', 'service', 'food', 'experience', 'app', 'song']
VERBS = ['is', 'was', 'seems', 'feels', 'looks']
TEMPLATES = [
    '{subject} {verb} {adj}',
    'I {verb} the {subject} - {adj}',
    '{adj} {subject}',
    '{subject} was {adj} and {extra}',
    'Absolutely {adj} {subject}!',
]

random.seed(42)

def make_sentence(sentiment):
    subj = random.choice(SUBJECTS)
    verb = random.choice(VERBS)
    if sentiment == 'positive':
        adj = random.choice(POSITIVE_ADJ)
        extra = random.choice(['would recommend','highly recommend','loved it'])
    elif sentiment == 'negative':
        adj = random.choice(NEGATIVE_ADJ)
        extra = random.choice(['would not recommend','never again','terrible experience'])
    else:
        adj = random.choice(NEUTRAL_ADJ)
        extra = random.choice(['no comments','not sure','okay-ish'])

    template = random.choice(TEMPLATES)
    sentence = template.format(subject=subj, verb=verb, adj=adj, extra=extra)

    if random.random() < 0.12:
        sentence += ' #' + random.choice(['fun','sad','omg'])
    if random.random() < 0.05:
        sentence = '@user ' + sentence

    return sentence


def generate_dataset(n=300):
    rows = []
    for _ in range(n):
        rows.append({'text': make_sentence('positive'), 'label': 'positive'})
        rows.append({'text': make_sentence('negative'), 'label': 'negative'})
        rows.append({'text': make_sentence('neutral'), 'label': 'neutral'})
    df = pd.DataFrame(rows).sample(frac=1).reset_index(drop=True)
    return df

df = generate_dataset(300)
df.head()


NameError: name 'random' is not defined

In [8]:
# Fix NLTK punkt_tab missing error (Colab / local)
import ssl
try:
    ssl._create_default_https_context = ssl._create_unverified_context
except Exception:
    pass

import nltk
import warnings
from nltk.tokenize import word_tokenize

needed = ["punkt", "punkt_tab", "stopwords", "wordnet", "vader_lexicon"]
for pkg in needed:
    try:
        nltk.data.find(f"tokenizers/{pkg}" if pkg.startswith("punkt") else f"corpora/{pkg}" if pkg in ("stopwords","wordnet") else f"sentiment/{pkg}")
    except LookupError:
        print(f"Downloading NLTK resource: {pkg} ...")
        try:
            nltk.download(pkg, quiet=True)
            print(f"Downloaded: {pkg}")
        except Exception as e:
            warnings.warn(f"Failed to download {pkg}: {e}")

# Test tokenizer
try:
    s = "This is a test. Let's see tokenization!"
    toks = word_tokenize(s)
    print("word_tokenize OK ->", toks)
except LookupError as le:
    warnings.warn(f"word_tokenize failed: {le}. Falling back to simple split tokenizer.")
    # Fallback simple tokenizer
    def simple_tokenize(text):
        return text.split()
    word_tokenize = simple_tokenize
    print("Using fallback simple_tokenize ->", word_tokenize("This is a test. Let's see tokenization!"))

# Example: your preprocess function test
import re, string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# initialize if not already
try:
    STOPWORDS = set(stopwords.words("english"))
except Exception:
    STOPWORDS = set()
    warnings.warn("stopwords not loaded; you may need to run nltk.download('stopwords')")

LEMMATIZER = WordNetLemmatizer()

def preprocess(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\.\S+', '', text)
    text = re.sub(r'@([A-Za-z0-9_]+)', r'\1', text)
    text = re.sub(r'#([A-Za-z0-9_]+)', r'\1', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [LEMMATIZER.lemmatize(t) for t in tokens if t not in STOPWORDS and len(t) > 1]
    return ' '.join(tokens)

print("Preprocess output:", preprocess("I LOVE this product!! #fun"))


Downloading NLTK resource: punkt ...
Downloaded: punkt
Downloading NLTK resource: punkt_tab ...
Downloaded: punkt_tab
Downloading NLTK resource: stopwords ...
Downloaded: stopwords
Downloading NLTK resource: wordnet ...
Downloaded: wordnet
Downloading NLTK resource: vader_lexicon ...
Downloaded: vader_lexicon
word_tokenize OK -> ['This', 'is', 'a', 'test', '.', 'Let', "'s", 'see', 'tokenization', '!']
Preprocess output: love product fun


In [9]:
import nltk, ssl, warnings

# Fix SSL (Colab sometimes blocks downloads)
try:
    ssl._create_default_https_context = ssl._create_unverified_context
except:
    pass

print("Downloading punkt and punkt_tab...")

# Download everything needed for tokenization
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)   # <-- THE IMPORTANT ONE
nltk.download('averaged_perceptron_tagger', quiet=True)

# Download other resources you need
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

print("NLTK setup complete ‚úî")


Downloading punkt and punkt_tab...
NLTK setup complete ‚úî


In [10]:
# Fully working preprocessing block (run as a single cell)
import re
import string
import warnings

# SSL fix (helps on some platforms like Colab)
try:
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context
except Exception:
    pass

# NLTK setup
import nltk
needed = ["punkt", "punkt_tab", "stopwords", "wordnet", "omw-1.4", "vader_lexicon"]
for pkg in needed:
    try:
        if pkg.startswith("punkt"):
            nltk.data.find(f"tokenizers/{pkg}")
        else:
            nltk.data.find(f"corpora/{pkg}")
    except LookupError:
        try:
            nltk.download(pkg, quiet=True)
            print(f"Downloaded NLTK resource: {pkg}")
        except Exception as e:
            warnings.warn(f"Failed to download {pkg}: {e}")

# Imports that depend on NLTK
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Try to import word_tokenize; if it fails, we'll fallback to a simple splitter
try:
    from nltk.tokenize import word_tokenize
    _nltk_tokenize_ok = True
except Exception:
    _nltk_tokenize_ok = False

# Prepare tools
try:
    STOPWORDS = set(stopwords.words("english"))
except Exception:
    STOPWORDS = set()
    warnings.warn("Could not load NLTK stopwords; continuing with empty stopword set.")

LEMMATIZER = WordNetLemmatizer()

# emoji detection (keeps presence as <EMOJI>)
emoji_pattern = re.compile("[\U00010000-\U0010ffff]", flags=re.UNICODE)

def _tokenize(text):
    if _nltk_tokenize_ok:
        try:
            return word_tokenize(text)
        except LookupError:
            pass
    # fallback
    return text.split()

def preprocess(text, keep_emoji_token=True):
    """
    Clean and normalize text:
      - lowercase
      - remove URLs
      - convert @mentions -> username
      - convert #hashtags -> tag
      - replace emojis with <EMOJI> tokens (keeps count)
      - remove punctuation
      - tokenize, lemmatize, remove stopwords and single-char tokens
    """
    if text is None:
        return ""
    # ensure string
    text = str(text)

    # lowercase
    text = text.lower()

    # URLs -> remove
    text = re.sub(r"http\S+|www\.\S+", " ", text)

    # mentions: @user -> user
    text = re.sub(r"@([A-Za-z0-9_]+)", r"\1", text)

    # hashtags: #tag -> tag
    text = re.sub(r"#([A-Za-z0-9_]+)", r"\1", text)

    # extract emojis and append placeholder(s)
    emojis = emoji_pattern.findall(text)
    if emojis and keep_emoji_token:
        text = emoji_pattern.sub(" ", text)
        # append one <EMOJI> per emoji (helps model see emoji count)
        text = text + " " + " ".join(["<EMOJI>"] * len(emojis))

    # remove punctuation (but keep < and > so <EMOJI> survives)
    # build translation map that preserves angle brackets
    punct = string.punctuation.replace("<", "").replace(">", "")
    text = text.translate(str.maketrans("", "", punct))

    # normalize whitespace
    text = re.sub(r"\s+", " ", text).strip()

    # tokenize
    tokens = _tokenize(text)

    # lemmatize and filter stopwords & single-char tokens
    cleaned = []
    for t in tokens:
        if not isinstance(t, str):
            continue
        t = t.strip()
        if len(t) <= 1:
            continue
        if t in STOPWORDS:
            continue
        t = LEMMATIZER.lemmatize(t)
        cleaned.append(t)

    return " ".join(cleaned)

# quick sanity check
print("Preprocess ready. Example ->", preprocess("I LOVE this product!! #fun üòä https://x.com @user"))


Downloaded NLTK resource: wordnet
Downloaded NLTK resource: omw-1.4
Downloaded NLTK resource: vader_lexicon
Preprocess ready. Example -> love product fun user EMOJI


In [11]:
# Fully working preprocessing block (run as a single cell)
import re
import string
import warnings

# SSL fix for some environments (Colab / certain VMs)
try:
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context
except Exception:
    pass

# NLTK setup & downloads
import nltk
needed = ["punkt", "punkt_tab", "stopwords", "wordnet", "omw-1.4"]
for pkg in needed:
    try:
        if pkg.startswith("punkt"):
            nltk.data.find(f"tokenizers/{pkg}")
        else:
            nltk.data.find(f"corpora/{pkg}")
    except LookupError:
        try:
            nltk.download(pkg, quiet=True)
            print(f"Downloaded NLTK resource: {pkg}")
        except Exception as e:
            warnings.warn(f"Failed to download {pkg}: {e}")

# Imports that depend on NLTK
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Try to import nltk tokenizer; if missing, fall back later
try:
    from nltk.tokenize import word_tokenize
    _nltk_tokenize_ok = True
except Exception:
    _nltk_tokenize_ok = False

# Prepare tools
try:
    STOPWORDS = set(stopwords.words("english"))
except Exception:
    STOPWORDS = set()
    warnings.warn("Could not load NLTK stopwords; continuing with empty stopword set.")

LEMMATIZER = WordNetLemmatizer()

# Emoji detection (keeps presence as <EMOJI>)
emoji_pattern = re.compile("[\U00010000-\U0010ffff]", flags=re.UNICODE)

def _tokenize(text):
    if _nltk_tokenize_ok:
        try:
            return word_tokenize(text)
        except LookupError:
            pass
    # fallback simple splitter
    return text.split()

# Optional small contraction expansion map (add entries as needed)
_contractions = {
    "don't": "do not",
    "doesn't": "does not",
    "i'm": "i am",
    "it's": "it is",
    "that's": "that is",
    "can't": "cannot",
    "won't": "will not",
    "i've": "i have",
}

def _expand_contractions(s):
    s = s.lower()
    for k,v in _contractions.items():
        s = s.replace(k, v)
    return s

def preprocess(text, keep_emoji_token=True, expand_contractions=True):
    """
    Returns cleaned text string.
    - Lowercases, removes URLs, converts @user -> user, #tag -> tag
    - Replaces emojis with <EMOJI> tokens (one per emoji)
    - Removes punctuation (preserves angle brackets so <EMOJI> stays)
    - Tokenizes, lemmatizes, removes stopwords and <=1-char tokens
    """
    if text is None:
        return ""
    # ensure string
    text = str(text)

    # expand contractions optionally
    if expand_contractions:
        text = _expand_contractions(text)

    # normalize to lowercase
    text = text.lower()

    # remove urls
    text = re.sub(r"http\S+|www\.\S+", " ", text)

    # mentions: @user -> user
    text = re.sub(r"@([A-Za-z0-9_]+)", r"\1", text)

    # hashtags: #tag -> tag
    text = re.sub(r"#([A-Za-z0-9_]+)", r"\1", text)

    # extract emojis and append placeholder(s)
    emojis = emoji_pattern.findall(text)
    if emojis and keep_emoji_token:
        text = emoji_pattern.sub(" ", text)
        text = text + " " + " ".join(["<EMOJI>"] * len(emojis))

    # remove punctuation but keep < and > for <EMOJI>
    punct = string.punctuation.replace("<", "").replace(">", "")
    text = text.translate(str.maketrans("", "", punct))

    # normalize whitespace
    text = re.sub(r"\s+", " ", text).strip()

    # tokenize
    tokens = _tokenize(text)

    # lemmatize & filter stopwords & short tokens
    cleaned = []
    for t in tokens:
        if not isinstance(t, str):
            continue
        t = t.strip()
        if len(t) <= 1:
            continue
        if t in STOPWORDS:
            continue
        t = LEMMATIZER.lemmatize(t)
        cleaned.append(t)

    return " ".join(cleaned)

# Quick sanity check
print("Preprocess ready. Example ->", preprocess("I LOVE this product!! #fun üòä https://x.com @user"))


Downloaded NLTK resource: wordnet
Downloaded NLTK resource: omw-1.4
Preprocess ready. Example -> love product fun user EMOJI


In [12]:
# üî• ONE-CELL FIX FOR NUMPY / SCIPY / SCIKIT-LEARN IMPORT ERRORS üî•
# (Paste this into ONE Colab cell and run it)

import os, sys, subprocess

print("üîß Uninstalling broken numpy / scipy / scikit-learn ...")
subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y",
                "numpy", "scipy", "scikit-learn"])

print("üßπ Clearing pip cache ...")
subprocess.run([sys.executable, "-m", "pip", "cache", "purge"])

print("‚¨áÔ∏è Installing compatible versions ...")
subprocess.run([sys.executable, "-m", "pip", "install", "--no-cache-dir",
                "numpy==1.26.4", "scipy==1.10.1", "scikit-learn==1.3.2"])

print("üîÑ Restarting runtime to apply fixes ...")
try:
    import google.colab
    google.colab.runtime.restart()
except:
    print("‚ö†Ô∏è Please restart runtime manually: Runtime ‚Üí Restart runtime")


üîß Uninstalling broken numpy / scipy / scikit-learn ...
üßπ Clearing pip cache ...
‚¨áÔ∏è Installing compatible versions ...
üîÑ Restarting runtime to apply fixes ...
‚ö†Ô∏è Please restart runtime manually: Runtime ‚Üí Restart runtime


In [13]:
# Run this AFTER the runtime restarts (one cell).
import os, joblib, sys, warnings

# 1) Mount Drive (idempotent)
try:
    from google.colab import drive
    drive.mount("/content/drive", force_remount=False)
    print("Drive mounted at /content/drive")
except Exception as e:
    print("Drive mount skipped or failed:", e)

SAVE_PATH = "/content/drive/MyDrive/sentiment_model.joblib"

# 2) Helper: try to ensure model + vectorizer are available
gs = globals()

bundle = None
if "best_model" in gs and "vectorizer" in gs:
    print("Found in-memory objects: best_model and vectorizer -> creating bundle")
    bundle = {"model": gs["best_model"], "vectorizer": gs["vectorizer"]}
else:
    # try to load from Drive path
    if os.path.exists(SAVE_PATH):
        try:
            print(f"Loading existing bundle from Drive: {SAVE_PATH}")
            bundle = joblib.load(SAVE_PATH)
            # check keys
            if isinstance(bundle, dict) and "model" in bundle and "vectorizer" in bundle:
                print("Loaded bundle contains model and vectorizer.")
            else:
                print("Loaded file doesn't look like a bundle (expected dict with keys 'model' and 'vectorizer').")
                bundle = None
        except Exception as e:
            print("Failed to load bundle from Drive:", e)
            bundle = None
    else:
        print("No in-memory model found and no file at", SAVE_PATH)

# 3) If still missing, tell the user how to produce it
if bundle is None:
    print("\nNo model bundle available to save/download.")
    print("If you already trained models earlier, re-run the training cell (the full pipeline) to recreate 'best_model' and 'vectorizer'.")
    print("If you want me to provide the single-cell training+save code again, run this cell as-is and I will paste it.")
else:
    # 4) Save bundle back to Drive (overwrite)
    try:
        os.makedirs(os.path.dirname(SAVE_PATH) or ".", exist_ok=True)
        joblib.dump(bundle, SAVE_PATH)
        print("Saved bundle to Drive:", SAVE_PATH)
    except Exception as e:
        print("Failed to save bundle to Drive:", e)

    # 5) Trigger browser download (works in Colab)
    try:
        from google.colab import files
        print("Starting browser download...")
        files.download(SAVE_PATH)
    except Exception as e:
        print("Could not trigger browser download automatically (maybe not running in Colab). You can download the file directly from your Google Drive:", e)

# 6) List drive folder contents for debugging
print("\nFiles in /content/drive/MyDrive (top 30):")
try:
    print(sorted(os.listdir("/content/drive/MyDrive"))[:30])
except Exception as e:
    print("Could not list Drive contents:", e)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive mounted at /content/drive
No in-memory model found and no file at /content/drive/MyDrive/sentiment_model.joblib

No model bundle available to save/download.
If you already trained models earlier, re-run the training cell (the full pipeline) to recreate 'best_model' and 'vectorizer'.
If you want me to provide the single-cell training+save code again, run this cell as-is and I will paste it.

Files in /content/drive/MyDrive (top 30):
['1000008052_optimized_1000.png', 'Anvitha_Reddy_Dornala_OfferLetter.pdf', 'Anvitha_reddy (1).pdf', 'Anvitha_reddy.pdf', 'Colab Notebooks', 'Contact Information.gform', 'Copy of Viatura.gform', 'Data_Analytics_Essentials_certificate_anvithareddydornala-gmail-com_6a1b5b45-2839-4e4c-b78f-634de076f5c4.pdf', 'Document from Anvitha Reddy', 'Document from Anvitha Reddy (1)', 'Grey Minimalist Professional Resume Document A4.pdf', 'H

In [14]:
# ============================
# 100% WORKING ONE-CELL MODEL
# Compatible with Python 3.12
# Uses TensorFlow instead of scikit-learn
# ============================

import os, sys
import pandas as pd
import numpy as np
import re, string, nltk, joblib
import tensorflow as tf
from tensorflow.keras import layers
from google.colab import files

# Download NLTK resources
nltk.download("punkt", quiet=True)
nltk.download("stopwords", quiet=True)
nltk.download("wordnet", quiet=True)

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

STOPWORDS = set(stopwords.words("english"))
LEMM = WordNetLemmatizer()

def preprocess(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\.\S+", " ", text)
    text = re.sub(r"[@#]\w+", " ", text)
    text = text.translate(str.maketrans("", "", string.punctuation))
    tokens = word_tokenize(text)
    tokens = [LEMM.lemmatize(t) for t in tokens if t not in STOPWORDS]
    return " ".join(tokens)

# Sample dataset
df = pd.DataFrame({
    "text": [
        "I love this!", "Worst thing ever", "Not bad",
        "Amazing product!", "Terrible quality", "Okay item",
        "I hate it", "Very good", "Neutral opinion", "Poor performance"
    ],
    "label": ["positive","negative","neutral","positive","negative",
              "neutral","negative","positive","neutral","negative"]
})

df["clean"] = df["text"].apply(preprocess)

# Encode labels
label_map = {"negative":0, "neutral":1, "positive":2}
df["y"] = df["label"].map(label_map)

# Train test split
X = df["clean"].values
y = df["y"].values

# Tokenizer
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=3000, oov_token="<OOV>")
tokenizer.fit_on_texts(X)

X_seq = tokenizer.texts_to_sequences(X)
X_pad = tf.keras.preprocessing.sequence.pad_sequences(X_seq, maxlen=20)

# Model
model = tf.keras.Sequential([
    layers.Embedding(3000, 32, input_length=20),
    layers.GlobalAveragePooling1D(),
    layers.Dense(32, activation="relu"),
    layers.Dense(3, activation="softmax")
])

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

# Train
model.fit(X_pad, y, epochs=10, verbose=1)

# Save bundle
bundle = {
    "model_json": model.to_json(),
    "model_weights": model.get_weights(),
    "tokenizer": tokenizer,
    "label_map": label_map
}

joblib.dump(bundle, "/content/sentiment_model_tf.joblib")
print("Saved model to: /content/sentiment_model_tf.joblib")

# Download
files.download("/content/sentiment_model_tf.joblib")


OpenCV bindings requires "numpy" package.
Install it via command:
    pip install numpy




Epoch 1/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.3000 - loss: 1.0979
Epoch 2/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5000 - loss: 1.0966
Epoch 3/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4000 - loss: 1.0952
Epoch 4/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4000 - loss: 1.0942
Epoch 5/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4000 - loss: 1.0936
Epoch 6/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.4000 - loss: 1.0929
Epoch 7/10
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import joblib
import tensorflow as tf
import numpy as np

# Load the bundle
bundle = joblib.load("/content/sentiment_model_tf.joblib")

model_json = bundle["model_json"]
model_weights = bundle["model_weights"]
tokenizer = bundle["tokenizer"]
label_map = bundle["label_map"]

# Rebuild the model
from tensorflow.keras.models import model_from_json
model = model_from_json(model_json)
model.set_weights(model_weights)

reverse_label = {v: k for k, v in label_map.items()}

def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    pad = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=20)
    probs = model.predict(pad)[0]
    label = reverse_label[np.argmax(probs)]
    return label, probs

print("Type your text (or 'quit'):")
while True:
    text = input(">> ")
    if text.lower() in ("quit", "exit"):
        break

    label, probs = predict_sentiment(text)
    print("Prediction:", label, " | Probabilities:", probs)


Type your text (or 'quit'):
>> This product is really good!
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 315ms/step
Prediction: negative  | Probabilities: [0.3599295  0.31014326 0.32992712]
>> This is the worst thing I ever bought.
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 29ms/step
Prediction: negative  | Probabilities: [0.3571955  0.31277615 0.33002838]
>> It's okay, not great but not bad either.
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 30ms/step
Prediction: negative  | Probabilities: [0.3558734  0.3144443  0.32968232]
>> I absolutely love the quality!
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 29ms/step
Prediction: negative  | Probabilities: [0.3598089  0.30951628 0.33067492]
