In [2]:
#impoting libraries
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [5]:
train_path="/content/train.txt"
test_path="/content/test.txt"
val_path="/content/val.txt"

In [6]:
train_df = pd.read_csv(train_path, sep=";", header=None, names=["text", "emotion"])
val_df = pd.read_csv(val_path, sep=";", header=None, names=["text", "emotion"])
test_df = pd.read_csv(test_path, sep=";", header=None, names=["text", "emotion"])
df = pd.concat([train_df, val_df, test_df], ignore_index=True)
print("Dataset shape:", df.shape)
print(df.head())


Dataset shape: (20000, 2)
                                                text  emotion
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger


In [7]:
#emotion assignment
mapping = {
    "joy": "Praise",
    "love": "Support",
    "anger": "Hate/Abuse",
    "fear": "Threat",
    "sadness": "Emotional",
    "surprise": "Praise"
}
df["category"] = df["emotion"].map(mapping)
extra_data = {
    "text": [
        "The animation was okay but the voiceover felt off.",   # Constructive Criticism
        "Nice effort, but the background music didn’t fit well.", # Constructive Criticism
        "Follow me for free giveaways!!!",                      # Irrelevant/Spam
        "Can you explain this with another example?"             # Question/Suggestion
    ],
    "category": [
        "Constructive Criticism",
        "Constructive Criticism",
        "Irrelevant/Spam",
        "Question/Suggestion"
    ]
}

extra_df = pd.DataFrame(extra_data)
df_final = pd.concat([df, extra_df], ignore_index=True)

print("Final dataset shape:", df_final.shape)
print(df_final["category"].value_counts())

Final dataset shape: (20004, 3)
category
Praise                    7480
Emotional                 5797
Hate/Abuse                2709
Threat                    2373
Support                   1641
Constructive Criticism       2
Irrelevant/Spam              1
Question/Suggestion          1
Name: count, dtype: int64


In [8]:
#preprocessing part
def preprocess(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)  # remove punctuation/numbers
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

df_final["clean_text"] = df_final["text"].apply(preprocess)


In [12]:
X = df_final["clean_text"]
y = df_final["category"]
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [13]:
#classifier used= Logotsic Regression
model = LogisticRegression(max_iter=300)
model.fit(X_train_vec, y_train)
y_pred = model.predict(X_test_vec)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
sample_comments = [
    "Amazing work! Loved the animation.",         # Praise
    "Keep going, you're doing great!",            # Support
    "This is trash, quit now.",                   # Hate/Abuse
    "I'll report you if this continues.",         # Threat
    "This reminded me of my childhood.",          # Emotional
    "Follow me for followers!",                   # Spam
    "The video was good, but the sound was poor.",# Constructive Criticism
    "Can you make one on topic X?"                # Question
]

sample_clean = [preprocess(c) for c in sample_comments]
sample_vec = vectorizer.transform(sample_clean)
predictions = model.predict(sample_vec)


Model Accuracy: 0.8880279930017496
                 precision    recall  f1-score   support

      Emotional       0.92      0.94      0.93      1173
     Hate/Abuse       0.94      0.82      0.87       565
Irrelevant/Spam       0.00      0.00      0.00         1
         Praise       0.85      0.96      0.90      1492
        Support       0.89      0.70      0.78       321
         Threat       0.89      0.72      0.80       449

       accuracy                           0.89      4001
      macro avg       0.75      0.69      0.71      4001
   weighted avg       0.89      0.89      0.89      4001



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [15]:
reply_templates = {
    "Praise": "Thank you for your kind words! We appreciate your support.",
    "Support": "Your encouragement means a lot to us. We’ll keep going!",
    "Constructive Criticism": "Thanks for your feedback! We’ll work on improving.",
    "Hate/Abuse": "We strive for respectful communication. Please be constructive.",
    "Threat": "Your concern has been noted. We will escalate this appropriately.",
    "Emotional": "We’re glad our content connected with you emotionally.",
    "Irrelevant/Spam": "This comment seems unrelated. Let’s stay on topic.",
    "Question/Suggestion": "Great question! We’ll consider this in future updates."
}
for comment, label in zip(sample_comments, predictions):
    print(f"\nComment: {comment}")
    print(f"Predicted Category: {label}")
    print("Suggested Reply:", reply_templates.get(label, ""))


Comment: Amazing work! Loved the animation.
Predicted Category: Praise
Suggested Reply: Thank you for your kind words! We appreciate your support.

Comment: Keep going, you're doing great!
Predicted Category: Praise
Suggested Reply: Thank you for your kind words! We appreciate your support.

Comment: This is trash, quit now.
Predicted Category: Emotional
Suggested Reply: We’re glad our content connected with you emotionally.

Comment: I'll report you if this continues.
Predicted Category: Emotional
Suggested Reply: We’re glad our content connected with you emotionally.

Comment: This reminded me of my childhood.
Predicted Category: Emotional
Suggested Reply: We’re glad our content connected with you emotionally.

Comment: Follow me for followers!
Predicted Category: Praise
Suggested Reply: Thank you for your kind words! We appreciate your support.

Comment: The video was good, but the sound was poor.
Predicted Category: Praise
Suggested Reply: Thank you for your kind words! We appreci

In [16]:
def predict_comment(comment):
    # Preprocess
    clean = preprocess(comment)
    vec = vectorizer.transform([clean])
    pred = model.predict(vec)[0]
    reply = reply_templates.get(pred, "No reply available.")
    return pred, reply

# Real-time loop
print("💬 Comment Categorization & Reply Assistant Tool (type 'exit' to stop)\n")
while True:
    user_input = input("Enter a comment: ")
    if user_input.lower() == "exit":
        break
    category, reply = predict_comment(user_input)
    print(f"Predicted Category: {category}")
    print(f"Suggested Reply: {reply}\n")


💬 Comment Categorization & Reply Assistant Tool (type 'exit' to stop)

Enter a comment: Hey this project seems cool!
Predicted Category: Praise
Suggested Reply: Thank you for your kind words! We appreciate your support.

Enter a comment: exit
