In [2]:
import pandas as pd
tweets_df = pd.read_csv("Dataset/tweets/tweets-valid.csv")
hate_df = pd.read_csv("Dataset/hate/hate_bin_valid.csv")
tweets_train= pd.read_csv("Dataset/tweets/tweets-extra.csv")
hate_train = pd.read_csv("Dataset/hate/hate_bin_train.csv")
emotion_df=pd.read_csv("Dataset/emotion/validation.csv")
emotion_train=pd.read_csv("Dataset/emotion/train.csv")




In [3]:
import re
import emoji
def clean_text(text):
    text = re.sub(r"http\S+|@\w+|#[A-Za-z0-9_]+", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    return text.strip().lower()

emotion_train['Tweet'] =emotion_train['Tweet'].astype(str).apply(clean_text)
emotion_df['Tweet'] =emotion_df['Tweet'].astype(str).apply(clean_text)
tweets_df['tweet'] = tweets_df['tweet'].astype(str).apply(clean_text)
tweets_train['tweet'] = tweets_train['tweet'].astype(str).apply(clean_text)
hate_train['text'] = hate_train['text'].astype(str).apply(clean_text)
hate_df['text'] = hate_df['text'].astype(str).apply(clean_text)


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# === Sentiment (Emotion) Classification Pipeline ===
sentiment_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('clf', LogisticRegression(max_iter=1000))
])

# === Hate Speech Classification Pipeline ===
hate_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('clf', LogisticRegression(max_iter=1000))
])

# === Training the models ===
sentiment_pipeline.fit(emotion_train['Tweet'], emotion_train['Label'])
hate_pipeline.fit(hate_train['text'], hate_train['label'])

# === Making Predictions ===
sentiment_preds = sentiment_pipeline.predict(emotion_df['Tweet'])
hate_preds = hate_pipeline.predict(hate_df['text'])

# === Classification Reports ===
print("==== Sentiment Classification Report ====")
print(classification_report(emotion_df['Label'], sentiment_preds))

print("\n==== Hate Speech Classification Report ====")
print(classification_report(hate_df['label'], hate_preds))


==== Sentiment Classification Report ====
               precision    recall  f1-score   support

Anger/Disgust       0.56      0.65      0.60       405
   Excitement       0.67      0.05      0.09        44
         Fear       0.50      0.08      0.13        13
    Happiness       0.66      0.66      0.66       140
      Neutral       0.47      0.71      0.57       499
        Pride       0.55      0.14      0.23        76
      Respect       0.64      0.50      0.56       141
      Sadness       0.67      0.03      0.05        76
      Sarcasm       0.00      0.00      0.00        10
     Surprise       0.00      0.00      0.00        85
      respect       0.00      0.00      0.00         6
      sadness       0.00      0.00      0.00         5

     accuracy                           0.53      1500
    macro avg       0.39      0.24      0.24      1500
 weighted avg       0.52      0.53      0.49      1500


==== Hate Speech Classification Report ====
              precision    rec

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [5]:
def chatbot():
    print("\nMarathi Sentiment & Hate Detection Chatbot")
    print("Type 'exit' to stop.\n")
    while True:
        user_input = input("📥 तुमचं वाक्य: ")
        if user_input.lower() == 'exit':
            break
        
        cleaned = clean_text(user_input)
        sentiment_pred = sentiment_pipeline.predict([cleaned])[0]
        hate_pred = hate_pipeline.predict([cleaned])[0]

        print(f"➡️ भावना (Sentiment): {sentiment_pred}")
        print(f"➡️ द्वेष / गैरवर्तन (Hate): {hate_pred}\n")


chatbot()



Marathi Sentiment & Hate Detection Chatbot
Type 'exit' to stop.

➡️ भावना (Sentiment): Anger/Disgust
➡️ द्वेष / गैरवर्तन (Hate): HOF

➡️ भावना (Sentiment): Neutral
➡️ द्वेष / गैरवर्तन (Hate): HOF

