## <b> <i> Testing on data <b> <i>

In [2]:
import re
import numpy as np
from joblib import load

In [3]:
file_path_ = r"WhatsAppChat.txt"

In [4]:
# Function to extract date and time
def date_time(s):
    pattern = (
        r"^([0-9]+)(\/)([0-9]+)(\/)([0-9]+), ([0-9]+):([0-9]+)([ ]|.)?(AM|PM|am|pm)? -"
    )
    result = re.match(pattern, s)
    if result:
        return True
    return False


# Function to extract author
def messenger(s):
    s = s.split(":")
    if len(s) == 2:
        return True
    else:
        return False


# Function to extract message data
def message_data(line):
    splitline = line.split(" - ")
    dateTime = splitline[0]
    date, time = dateTime.split(", ")
    message = " ".join(splitline[1:])
    if messenger(message):
        splitmessage = message.split(": ")
        author = splitmessage[0]
        message = " ".join(splitmessage[1:])
    else:
        author = None
    return date, time, author, message

#dummy tuple =================================================
data = []

#main block =================================================
try:
 with open(file_path_, encoding="utf-8") as fp:
    fp.readline()
    messageBuffer = []
    date, time, author = None, None, None
    while True:
        line = fp.readline()
        if not line:
            break
        line = line.strip()
        if date_time(line):
            if len(messageBuffer) > 0:
                data.append([date, time, author, ' '.join(messageBuffer)])
            messageBuffer.clear()
            date, time, author, message = message_data(line)
            messageBuffer.append(message)
        else:
            messageBuffer.append(line)

except Exception as e:
        print(f"Exception caught {e}")

In [5]:
import pandas as pd
df = pd.DataFrame(data, columns=['Date', 'Time', 'Author', 'Message'])
text = df["Message"].tolist()

# Model Predictions

In [6]:
mnb_model = load('pipeline_mnb1.joblib')
svm_model = load("pipeline_svm.joblib")
lr_model = load("pipeline_lr.joblib")
rf_model = load("pipeline_rf.joblib")
dt_model = load("pipeline_dt.joblib")
knn_model = load("pipeline_knn.joblib")
mnb799 = load("multinomialNB799.joblib")
mlp_model = load("pipeline_mlp.joblib")
gbc_model = load("pipeline_gbc.joblib")
model_abc = load("pipeline_abc.joblib")

In [8]:
import pandas as pd

models = [ mnb_model, svm_model, lr_model, rf_model, dt_model, knn_model, mnb799, mlp_model, gbc_model, model_abc]

messages = df["Message"]

sentiment_labels = {-1: "negative", 0: "neutral", 1: "positive"}


for model in models:
    print("Using model:" + str(model))
    
    predictions = model.predict(messages)
    
   
    predicted_sentiments = [sentiment_labels[pred] for pred in predictions]
    

    result_df = pd.DataFrame({"Message": messages, "Sentiment": predicted_sentiments})
    result_df.reset_index(drop=True, inplace=True)

    print(result_df.head(50))
    print("----------------------------------------------------------------------------------")

Using model:Pipeline(steps=[('tfidf', TfidfVectorizer()),
                ('mnb', MultinomialNB(alpha=0.5))])
                                              Message Sentiment
0                                      Sabko bhej  do   neutral
1                                                 Okk   neutral
2                         Abhinav log ko sbko bhej do   neutral
3                                                 Okk   neutral
4                                      Bhulna mt bhai   neutral
5                                         Sbko bhejdo   neutral
6                             Aur groups me mt bhejna  negative
7                                                 Okk   neutral
8                                  Abhinav do bhej do   neutral
9                                                  Ok   neutral
10                                       12 baje aana   neutral
11                                                Okk   neutral
12                                            Aoo gay  neg