In [43]:
# --- IMPORTS ---
import pandas as pd
import numpy as np
import re
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import nltk
from nltk import pos_tag
from nltk.corpus import wordnet

# --- Ensure NLTK resources are downloaded ---
for resource in [
    "punkt",
    "wordnet",
    "stopwords",
    "averaged_perceptron_tagger",
]:
    try:
        nltk.data.find(f"corpora/{resource}")
    except LookupError:
        nltk.download(resource)

# --- 1. DATASET SETUP ---
data = {
    "text": [
        "I love this product, it's perfect and exceeded my expectations!",
        "This application is absolutely terrible and unusable, a complete waste of time.",
        "It works fine most of the time, no major issues, just average performance.",
        "I’m so disappointed with the lack of features and constant bugs.",
        "Absolutely fantastic experience, top-notch support and incredibly quick resolution!",
        "Horrible service! I waited over an hour for a response and got no help.",
        "Not bad at all, could be better but it serves its basic purpose well.",
        "Worst thing ever, I'm canceling my subscription right now, I'm furious.",
        "Great help from support, they were very prompt, efficient, and friendly.",
        "Okay I guess, nothing special about it, quite neutral actually.",
        "The service was bad and my issue wasn't fixed.",
        "I have no opinion on the matter, it just exists.",
        "The user interface is clean, easy to navigate, and highly intuitive.",
        "It crashes every time I open the settings menu—completely broken software.",
        "It performs the core function, but the load times are truly unacceptable.",
        "I'm cautiously optimistic about the new features; they seem promising.",
        "I'm giving this a neutral score because I haven't used it enough to form an opinion.",
        "The price is a bit high for what it offers, making it a marginal value.",
        "Honestly, it's the best software update I've seen all year. Flawless!",
        "It was merely adequate; I encountered several minor inconveniences but nothing major.",
        "The customer service representative was rude, arrogant, and unhelpful.",
        "I found a bug, but otherwise, the experience was quite positive and speedy.",
        "This is highly functional, completely reliable, and I recommend it to everyone.",
        "I am so angry; the data I spent hours collecting was completely wiped out by the crash.",
        "The setup process was slightly confusing, leading to some early frustration.",
        "After a few hours of tinkering, it turned out to be exactly what I needed. Solid purchase.",
        "It's loud, bulky, and poorly designed. I regret this purchase.",
        "I was pleasantly surprised by the quality, which was much better than I expected.",
        "The documentation is non-existent, making it impossible to debug any problems.",
        "It’s totally fine, not the best, but I can't complain for the low price.",
        "This is amazing.",
        "I hate this.",
        "It works perfectly.",
        "A total failure.",
        "I am so happy with the results.",
        "This product is trash.",
        "Excellent.",
        "Worst ever.",
        "Simply the best purchase.",
        "A complete disaster.",
        "I'm thrilled!",
        "Total waste of money.",
    ],
    "label": [
        1.0, -0.9, 0.3, -0.85, 1.0, -1.0, 0.4, -1.0, 0.85, 0.0, -0.8, 0.0,
        0.95, -0.95, -0.5, 0.55, 0.0, -0.25, 1.0, 0.1, -0.75, 0.6, 0.9, -0.99,
        -0.4, 0.7, -0.8, 0.8, -0.7, 0.25, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
        1.0, -1.0, 1.0, -1.0, 1.0, -1.0
    ]
}
df = pd.DataFrame(data)

# --- 2. PREPROCESSING & VECTORIZATION ---
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def get_wordnet_pos(tag):
    tag_map = {'J': wordnet.ADJ, 'V': wordnet.VERB, 'N': wordnet.NOUN, 'R': wordnet.ADV}
    return tag_map.get(tag[0], wordnet.NOUN)

def lemmatize_tokenizer(text):
    text = re.sub(r'[^\w\s]', '', text.lower())
    tokens = [w for w in text.split() if w not in stop_words and len(w) > 1]
    tagged = pos_tag(tokens)
    lemmatized = [lemmatizer.lemmatize(w, get_wordnet_pos(t)) for w, t in tagged]
    return lemmatized

vectorizer = TfidfVectorizer(
    max_features=1000,
    tokenizer=lemmatize_tokenizer,
    ngram_range=(1, 3),
    use_idf=True,
    smooth_idf=True,
    sublinear_tf=True,
    preprocessor=None,
    norm=None
)

X = vectorizer.fit_transform(df["text"]).toarray()
y = df["label"].values.astype(np.float32).reshape(-1, 1)
feature_names = vectorizer.get_feature_names_out()

# --- Vectorization Diagnostics ---
print("\n=== TF-IDF Vectorization Summary ===")
print(f"Corpus size: {len(df)} documents")
print(f"Vocabulary size: {len(feature_names)}")
print(f"Input dimension (X): {X.shape}")
print(f"Mean feature variance across samples: {np.mean(np.std(X, axis=1)):.6f}")
print("-" * 60)

idf_values = vectorizer.idf_
idf_map = sorted(zip(feature_names, idf_values), key=lambda x: x[1])
print("Lowest-IDF (common) tokens:")
for tok, val in idf_map[:10]:
    print(f"  {tok:<30s} {val:.4f}")
print("\nHighest-IDF (rare) tokens:")
for tok, val in idf_map[-10:]:
    print(f"  {tok:<30s} {val:.4f}")
print("-" * 60)

print("\n--- Sample TF-IDF Vector Diagnostics ---")
sample_indices = [0, 1, 2]
for i in sample_indices:
    text = df["text"].iloc[i]
    vec = X[i]
    nonzero_idx = np.where(vec > 0)[0]
    top_tokens = sorted(
        [(feature_names[j], vec[j]) for j in nonzero_idx],
        key=lambda x: x[1],
        reverse=True
    )[:10]
    print(f"\nText {i+1}: '{text}'")
    print("Top weighted tokens:")
    for tok, val in top_tokens:
        print(f"   {tok:<30s} {val:.4f}")
print("-" * 60)

# --- Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
INPUT_SIZE = X_train.shape[1]
print(f"Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}")
print(f"Input feature dimension for model: {INPUT_SIZE}")
print("-" * 60)

# --- 3. MODEL DEFINITION ---
class SentimentNet(nn.Module):
    def __init__(self, input_size):
        super(SentimentNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 1)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        return self.tanh(self.fc3(x))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentimentNet(INPUT_SIZE).to(device)

# --- 4. TRAINING LOOP ---
EPOCHS = 500
LEARNING_RATE = 0.005
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).to(device)

print(f"Starting training on {len(X_train)} samples for {EPOCHS} epochs (LR={LEARNING_RATE})...")
for epoch in range(EPOCHS):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {loss.item():.4f}")
print("Training Complete.")
print("-" * 60)

# --- 5. INFERENCE ---
model.eval()
test_cases = [
    ("I was completely blown away by the speed and elegant design. A masterpiece!", 0.99),
    ("The update ruined the app's functionality; it's slow, buggy, and completely unusable now.", -0.95),
    ("The meeting notes were recorded, but they didn't offer any constructive feedback.", 0.05)
]

def predict_sentiment(text, vectorizer, model, device):
    new_vec = vectorizer.transform([text]).toarray()
    new_tensor = torch.FloatTensor(new_vec).to(device)
    with torch.no_grad():
        prediction = model(new_tensor).squeeze().item()
    return prediction

print("\n**Inference Predictions (Score range: -1.0 to 1.0):**")
for text, expected in test_cases:
    prediction = predict_sentiment(text, vectorizer, model, device)
    print(f"\nMessage: '{text}'")
    print(f"Expected: {expected:+.2f} | Predicted: {prediction:+.4f}")
print("-" * 60)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\swaro\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\swaro\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\swaro\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.



=== TF-IDF Vectorization Summary ===
Corpus size: 42 documents
Vocabulary size: 514
Input dimension (X): (42, 514)
Mean feature variance across samples: 0.596522
------------------------------------------------------------
Lowest-IDF (common) tokens:
  im                             2.9694
  time                           3.1518
  bad                            3.3749
  best                           3.3749
  hour                           3.3749
  purchase                       3.3749
  service                        3.3749
  absolutely                     3.6626
  bug                            3.6626
  complete                       3.6626

Highest-IDF (rare) tokens:
  well expect                    4.0681
  wipe                           4.0681
  wipe crash                     4.0681
  work fine                      4.0681
  work fine time                 4.0681
  work perfectly                 4.0681
  worst                          4.0681
  worst ever                     4.0681


In [54]:
test_cases = [
    ("I was completely blown away by the speed and elegant design. A masterpiece!", "Positive"),
    ("The app's functionality is slow, buggy, and completely unusable now. Complete waste of time", "Negative"),
    ("The meeting notes were recorded.", "Neutral"),
    ("I have no preference about the color of the application.", "Neutral"),
    ("The service was okay, and I don't have an opinion.", "Neutral"),
]

def predict_sentiment(text, vectorizer, model, device):
    new_vec = vectorizer.transform([text]).toarray()
    new_tensor = torch.FloatTensor(new_vec).to(device)
    with torch.no_grad():
        prediction = model(new_tensor).squeeze().item()
    return prediction

def classify_sentiment(score):
    """Convert numeric score to categorical label."""
    if score > 0.35:
        return "Positive"
    elif score < -0.35:
        return "Negative"
    else:
        return "Neutral"

print("\n**Inference Predictions (Score range: -1.0 to 1.0):**")
for text, expected in test_cases:
    score = predict_sentiment(text, vectorizer, model, device)
    sentiment_label = classify_sentiment(score)
    print(f"\nMessage: '{text}'")
    print(f"Expected: {expected} | Predicted Score: {score:+.4f} | Predicted Sentiment: {sentiment_label}")
print("-" * 60)



**Inference Predictions (Score range: -1.0 to 1.0):**

Message: 'I was completely blown away by the speed and elegant design. A masterpiece!'
Expected: Positive | Predicted Score: +0.6314 | Predicted Sentiment: Positive

Message: 'The app's functionality is slow, buggy, and completely unusable now. Complete waste of time'
Expected: Negative | Predicted Score: -0.9997 | Predicted Sentiment: Negative

Message: 'The meeting notes were recorded.'
Expected: Neutral | Predicted Score: -0.2858 | Predicted Sentiment: Neutral

Message: 'I have no preference about the color of the application.'
Expected: Neutral | Predicted Score: -0.7570 | Predicted Sentiment: Negative

Message: 'The service was okay, and I don't have an opinion.'
Expected: Neutral | Predicted Score: -0.9638 | Predicted Sentiment: Negative
------------------------------------------------------------


Setup, Imports, and NLTK Downloads

In [1]:
# --- IMPORTS ---
import pandas as pd
import numpy as np
import re
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import nltk
from nltk import pos_tag
from nltk.corpus import wordnet
import warnings

# Suppress sklearn future warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)

# --- Ensure NLTK resources are downloaded ---
# This block checks for and downloads the necessary NLTK components 
# (tokenization, word list, part-of-speech tagging data) needed for lemmatization.
print("Checking for NLTK resources...")
for resource in [
    "punkt",
    "wordnet",
    "stopwords",
    "averaged_perceptron_tagger",
]:
    try:
        nltk.data.find(f"corpora/{resource}")
    except LookupError:
        print(f"Downloading NLTK resource: {resource}...")
        nltk.download(resource)
print("NLTK resources are ready.")

Checking for NLTK resources...
Downloading NLTK resource: punkt...
Downloading NLTK resource: wordnet...
Downloading NLTK resource: averaged_perceptron_tagger...
NLTK resources are ready.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\swaro\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\swaro\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\swaro\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Data Loading and Preparation

In [2]:
# --- 1. DATASET SETUP ---
data = {
    "text": [
        "I love this product, it's perfect and exceeded my expectations!",
        "This application is absolutely terrible and unusable, a complete waste of time.",
        "It works fine most of the time, no major issues, just average performance.",
        "I’m so disappointed with the lack of features and constant bugs.",
        "Absolutely fantastic experience, top-notch support and incredibly quick resolution!",
        "Horrible service! I waited over an hour for a response and got no help.",
        "Not bad at all, could be better but it serves its basic purpose well.",
        "Worst thing ever, I'm canceling my subscription right now, I'm furious.",
        "Great help from support, they were very prompt, efficient, and friendly.",
        "Okay I guess, nothing special about it, quite neutral actually.",
        "The service was bad and my issue wasn't fixed.",
        "I have no opinion on the matter, it just exists.",
        "The user interface is clean, easy to navigate, and highly intuitive.",
        "It crashes every time I open the settings menu—completely broken software.",
        "It performs the core function, but the load times are truly unacceptable.",
        "I'm cautiously optimistic about the new features; they seem promising.",
        "I'm giving this a neutral score because I haven't used it enough to form an opinion.",
        "The price is a bit high for what it offers, making it a marginal value.",
        "Honestly, it's the best software update I've seen all year. Flawless!",
        "It was merely adequate; I encountered several minor inconveniences but nothing major.",
        "The customer service representative was rude, arrogant, and unhelpful.",
        "I found a bug, but otherwise, the experience was quite positive and speedy.",
        "This is highly functional, completely reliable, and I recommend it to everyone.",
        "I am so angry; the data I spent hours collecting was completely wiped out by the crash.",
        "The setup process was slightly confusing, leading to some early frustration.",
        "After a few hours of tinkering, it turned out to be exactly what I needed. Solid purchase.",
        "It's loud, bulky, and poorly designed. I regret this purchase.",
        "I was pleasantly surprised by the quality, which was much better than I expected.",
        "The documentation is non-existent, making it impossible to debug any problems.",
        "It’s totally fine, not the best, but I can't complain for the low price.",
        "This is amazing.",
        "I hate this.",
        "It works perfectly.",
        "A total failure.",
        "I am so happy with the results.",
        "This product is trash.",
        "Excellent.",
        "Worst ever.",
        "Simply the best purchase.",
        "A complete disaster.",
        "I'm thrilled!",
        "Total waste of money.",
    ],
    "label": [
        1.0, -0.9, 0.3, -0.85, 1.0, -1.0, 0.4, -1.0, 0.85, 0.0, -0.8, 0.0,
        0.95, -0.95, -0.5, 0.55, 0.0, -0.25, 1.0, 0.1, -0.75, 0.6, 0.9, -0.99,
        -0.4, 0.7, -0.8, 0.8, -0.7, 0.25, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0,
        1.0, -1.0, 1.0, -1.0, 1.0, -1.0
    ]
}
df = pd.DataFrame(data)

# Display a sample of the data
print(f"Dataset loaded with {len(df)} samples.")
print("\nFirst 5 samples:")
print(df.head())

Dataset loaded with 42 samples.

First 5 samples:
                                                text  label
0  I love this product, it's perfect and exceeded...   1.00
1  This application is absolutely terrible and un...  -0.90
2  It works fine most of the time, no major issue...   0.30
3  I’m so disappointed with the lack of features ...  -0.85
4  Absolutely fantastic experience, top-notch sup...   1.00


Text Preprocessing, Vectorization, and Split

In [None]:
# --- 2. PREPROCESSING & VECTORIZATION ---
# Initialize lemmatizer and set of English stop words
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Helper function to map NLTK's Part-of-Speech tag to WordNet's POS tag
def get_wordnet_pos(tag):
    """Map NLTK POS tag to WordNet POS tag for accurate lemmatization."""
    tag_map = {'J': wordnet.ADJ, 'V': wordnet.VERB, 'N': wordnet.NOUN, 'R': wordnet.ADV}
    # Default to NOUN if the tag is not recognized
    return tag_map.get(tag[0], wordnet.NOUN)

# Custom tokenizer for TF-IDF that performs cleaning, stop word removal, and lemmatization
def lemmatize_tokenizer(text):
    """
    Cleans text, removes stop words, and lemmatizes tokens using POS tags.
    """
    # Remove punctuation/non-word characters and convert to lower case
    text = re.sub(r'[^\w\s]', '', text.lower())
    # Tokenize and remove stop words and single-character tokens
    tokens = [w for w in text.split() if w not in stop_words and len(w) > 1]
    # Get POS tags for tokens
    tagged = pos_tag(tokens)
    # Lemmatize tokens based on their POS tag
    lemmatized = [lemmatizer.lemmatize(w, get_wordnet_pos(t)) for w, t in tagged]
    return lemmatized

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(
    max_features=1000,           # Max number of features (tokens/n-grams) to keep
    tokenizer=lemmatize_tokenizer, # Use the custom lemmatizing tokenizer
    ngram_range=(1, 3),          # Include unigrams, bigrams, and trigrams
    use_idf=True,
    smooth_idf=True,
    sublinear_tf=True,           # Scaling factor to prevent highly frequent terms from dominating
    preprocessor=None,
    norm='l2' # Add L2 normalization for the final vector (common for neural networks)
)

# Apply vectorization to the 'text' data
X = vectorizer.fit_transform(df["text"]).toarray()
# Convert labels to NumPy float32 array, suitable for PyTorch regression
y = df["label"].values.astype(np.float32).reshape(-1, 1)
feature_names = vectorizer.get_feature_names_out()

import random
idf_values = vectorizer.idf_
idf_map = sorted(zip(feature_names, idf_values), key=lambda x: x[1])
print("IDF tokens:")
for tok, val in idf_map[:10]:
    print(f"  {tok:<30s} {val:.4f}")



# --- Vectorization Diagnostics ---
print("\n=== TF-IDF Vectorization Summary ===")
print(f"Corpus size: {len(df)} documents")
print(f"Vocabulary size: {len(feature_names)} (Max features was 1000)")
print(f"Input dimension (X): {X.shape}")
print("-" * 60)

# --- Train-Test Split ---
# Split the data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
INPUT_SIZE = X_train.shape[1] # The number of features from the vectorizer
print(f"Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}")
print(f"Input feature dimension for model: {INPUT_SIZE}")

Lowest-IDF (common) tokens:
  im                             2.9694
  time                           3.1518
  bad                            3.3749
  best                           3.3749
  hour                           3.3749
  purchase                       3.3749
  service                        3.3749
  absolutely                     3.6626
  bug                            3.6626
  complete                       3.6626

=== TF-IDF Vectorization Summary ===
Corpus size: 42 documents
Vocabulary size: 514 (Max features was 1000)
Input dimension (X): (42, 514)
------------------------------------------------------------
Training data shape: (33, 514), Testing data shape: (9, 514)
Input feature dimension for model: 514


Model Definition and Training

In [8]:
# --- 3. MODEL DEFINITION ---
class SentimentNet(nn.Module):
    """
    A simple Feed-Forward Neural Network (FFNN) for regression.
    The final activation is Tanh, which squashes the output to a range of [-1, 1],
    matching the label range for sentiment scores.
    """
    def __init__(self, input_size):
        super(SentimentNet, self).__init__()
        # Input layer (size is the number of TF-IDF features)
        self.fc1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        # Hidden layer
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        # Output layer (size 1 for the single sentiment score)
        self.fc3 = nn.Linear(64, 1)
        # Tanh activation squashes the final output to a range of [-1, 1]
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        return self.tanh(self.fc3(x))

# Set device to GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentimentNet(INPUT_SIZE).to(device)

# --- 4. TRAINING LOOP SETUP ---
EPOCHS = 5000
LEARNING_RATE = 0.005
# Mean Squared Error (MSE) is used as the loss function for regression tasks
criterion = nn.MSELoss() 
# Adam is a popular optimization algorithm
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Convert NumPy arrays to PyTorch Tensors and move them to the selected device
X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).to(device)

# --- TRAINING LOOP ---
print(f"Starting training on {len(X_train)} samples for {EPOCHS} epochs (LR={LEARNING_RATE}) on {device}...")
for epoch in range(EPOCHS):
    model.train() # Set the model to training mode
    optimizer.zero_grad() # Clear previous gradients
    
    # Forward pass
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        # Print loss periodically to monitor progress
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {loss.item():.4f}")
print("Training Complete.")
print("-" * 60)

Starting training on 33 samples for 5000 epochs (LR=0.005) on cuda...
Epoch [100/5000], Loss: 0.0004
Epoch [200/5000], Loss: 0.0000
Epoch [300/5000], Loss: 0.0000
Epoch [400/5000], Loss: 0.0001
Epoch [500/5000], Loss: 0.0000
Epoch [600/5000], Loss: 0.0000
Epoch [700/5000], Loss: 0.0000
Epoch [800/5000], Loss: 0.0000
Epoch [900/5000], Loss: 0.0000
Epoch [1000/5000], Loss: 0.0000
Epoch [1100/5000], Loss: 0.0000
Epoch [1200/5000], Loss: 0.0000
Epoch [1300/5000], Loss: 0.0000
Epoch [1400/5000], Loss: 0.0000
Epoch [1500/5000], Loss: 0.0001
Epoch [1600/5000], Loss: 0.0000
Epoch [1700/5000], Loss: 0.0000
Epoch [1800/5000], Loss: 0.0000
Epoch [1900/5000], Loss: 0.0000
Epoch [2000/5000], Loss: 0.0000
Epoch [2100/5000], Loss: 0.0000
Epoch [2200/5000], Loss: 0.0000
Epoch [2300/5000], Loss: 0.0000
Epoch [2400/5000], Loss: 0.0000
Epoch [2500/5000], Loss: 0.0000
Epoch [2600/5000], Loss: 0.0000
Epoch [2700/5000], Loss: 0.0000
Epoch [2800/5000], Loss: 0.0000
Epoch [2900/5000], Loss: 0.0000
Epoch [3000

Inference and Testing on New Data

In [13]:
test_cases = [
    ("I was completely blown away by the speed and elegant design. A masterpiece!", "Positive"),
    ("The app's functionality is slow, buggy, and completely unusable now. Complete waste of time", "Negative"),
    ("The meeting notes were recorded. I am not sure if they are useful", "Neutral"),
    ("I have no preference about the color of the application.", "Neutral"),
    ("The service was well taken care of, and I really liked it.", "Positive"),
]

def predict_sentiment(text, vectorizer, model, device):
    new_vec = vectorizer.transform([text]).toarray()
    new_tensor = torch.FloatTensor(new_vec).to(device)
    with torch.no_grad():
        prediction = model(new_tensor).squeeze().item()
    return prediction

def classify_sentiment(score):
    """Convert numeric score to categorical label."""
    if score > 0.35:
        return "Positive"
    elif score < -0.35:
        return "Negative"
    else:
        return "Neutral"

print("\n**Inference Predictions (Score range: -1.0 to 1.0):**")
for text, expected in test_cases:
    score = predict_sentiment(text, vectorizer, model, device)
    sentiment_label = classify_sentiment(score)
    print(f"\nMessage: '{text}'")
    print(f"Expected: {expected} | Predicted Score: {score:+.4f} | Predicted Sentiment: {sentiment_label}")
print("-" * 60)



**Inference Predictions (Score range: -1.0 to 1.0):**

Message: 'I was completely blown away by the speed and elegant design. A masterpiece!'
Expected: Positive | Predicted Score: +0.3337 | Predicted Sentiment: Neutral

Message: 'The app's functionality is slow, buggy, and completely unusable now. Complete waste of time'
Expected: Negative | Predicted Score: -0.8515 | Predicted Sentiment: Negative

Message: 'The meeting notes were recorded. I am not sure if they are useful'
Expected: Neutral | Predicted Score: +0.1849 | Predicted Sentiment: Neutral

Message: 'I have no preference about the color of the application.'
Expected: Neutral | Predicted Score: -0.2969 | Predicted Sentiment: Neutral

Message: 'The service was well taken care of, and I really liked it.'
Expected: Positive | Predicted Score: -0.0105 | Predicted Sentiment: Neutral
------------------------------------------------------------
