# MLOps Assignment 1
# Text Classification - **Testing**

## Model Deployment

### Load the saved model

In [32]:
import pickle

# Define the file path where the trained model is saved
model_file_path = "naive_bayes_emotion_model.pkl"

# Load the saved Naive Bayes model from the file
with open(model_file_path, 'rb') as file:
    loaded_model = pickle.load(file)

print("Trained model loaded successfully")

Trained model loaded successfully


### Functions to Preprocess Text

In [33]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [48]:
import nltk
import re
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import torch
from transformers import AutoTokenizer
import pickle

# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')

# Function to remove punctuations from text
def remove_punctuation(text):
    regular_punct = string.punctuation
    return str(re.sub(r'['+regular_punct+']', '', str(text)))

# Function to remove URLs from text
def remove_urls(text):
    return re.sub(r'http[s]?://\S+', '', text)

# Function to convert the text into lower case
def lower_case(text):
    return text.lower()

# Function to lemmatize text
def lemmatize(text):
    wordnet_lemmatizer = WordNetLemmatizer()
    tokens = nltk.word_tokenize(text)
    lemma_txt = ''
    for w in tokens:
        lemma_txt = lemma_txt + wordnet_lemmatizer.lemmatize(w) + ' '
    return lemma_txt


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


### Function to Predict Class

In [49]:

# Function to preprocess text
def preprocess_text(text):
    text = remove_urls(text)
    text = remove_punctuation(text)
    text = lower_case(text)
    text = lemmatize(text)
    return text

# Function to predict class
def predict_class(input_text, model):
    # Preprocess input text
    input_text = preprocess_text(input_text)

    # Tokenize the text
    tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
    tokenized_text = tokenizer(input_text, return_tensors='pt', padding=True, truncation=True)

    # Convert tokenized data into tensors
    input_ids = tokenized_text['input_ids']
    attention_mask = tokenized_text['attention_mask']

    # Get the tokens
    tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())

    # Calculate the number of tokens
    num_tokens = len(tokens)

    # Here you can truncate or pad the number of tokens to match the expected number of features for MultinomialNB
    # For example, you can truncate or pad to 94 tokens

    # Truncate or pad the tokens to match the expected number of features
    max_features = 94
    if num_tokens > max_features:
        # Truncate tokens
        input_ids = input_ids[:, :max_features]
        attention_mask = attention_mask[:, :max_features]
    elif num_tokens < max_features:
        # Pad tokens
        input_ids = torch.nn.functional.pad(input_ids, (0, max_features - num_tokens), value=tokenizer.pad_token_id)
        attention_mask = torch.nn.functional.pad(attention_mask, (0, max_features - num_tokens), value=0)

    data = {'input_ids': input_ids, 'attention_mask': attention_mask}
    data_tensors = {key: torch.tensor(val) for key, val in data.items()}

    # Convert input_ids to NumPy array
    input_ids_numpy = data_tensors['input_ids'].numpy()

    # Reshape input_ids if necessary (optional step)
    # input_ids_numpy = input_ids_numpy.reshape(input_ids_numpy.shape[0], -1)

    # Make predictions using the model
    predicted_class = model.predict(input_ids_numpy)
    return predicted_class

## Model Testing

In [56]:
# Define labels corresponding to the predicted classes
labels = {0: "sadness", 1: "joy", 2: "love", 3: "anger", 4: "fear", 5: "surprise",}

# List of input texts
input_texts = [
    "im grabbing a minute to post i feel greedy wrong",
    "this movie is amazing, I loved every minute of it",
    "I'm not sure how I feel about this book, it's quite confusing",
    "feeling happy and excited about the upcoming trip",
    "today's weather is gloomy, it's making me sad",
    "the food at that restaurant was terrible, I won't be going back",
]

# Predict class for each input text
for text in input_texts:
    predicted_class = predict_class(text, loaded_model)
    predicted_label = labels[int(predicted_class)]  # Get the corresponding label

    print(f"Input text: {text}")
    print(f"Predicted class: {predicted_class} ({predicted_label})")
    print()

  data_tensors = {key: torch.tensor(val) for key, val in data.items()}
  predicted_label = labels[int(predicted_class)]  # Get the corresponding label


Input text: im grabbing a minute to post i feel greedy wrong
Predicted class: [4] (fear)



  data_tensors = {key: torch.tensor(val) for key, val in data.items()}
  predicted_label = labels[int(predicted_class)]  # Get the corresponding label
  data_tensors = {key: torch.tensor(val) for key, val in data.items()}
  predicted_label = labels[int(predicted_class)]  # Get the corresponding label


Input text: this movie is amazing, I loved every minute of it
Predicted class: [0] (sadness)

Input text: I'm not sure how I feel about this book, it's quite confusing
Predicted class: [4] (fear)



  data_tensors = {key: torch.tensor(val) for key, val in data.items()}
  predicted_label = labels[int(predicted_class)]  # Get the corresponding label


Input text: feeling happy and excited about the upcoming trip
Predicted class: [4] (fear)

Input text: today's weather is gloomy, it's making me sad
Predicted class: [4] (fear)

Input text: the food at that restaurant was terrible, I won't be going back
Predicted class: [0] (sadness)



  data_tensors = {key: torch.tensor(val) for key, val in data.items()}
  predicted_label = labels[int(predicted_class)]  # Get the corresponding label
  data_tensors = {key: torch.tensor(val) for key, val in data.items()}
  predicted_label = labels[int(predicted_class)]  # Get the corresponding label
