<a href="https://colab.research.google.com/github/Fuad-Khan/Religious-Harassment-Models/blob/main/3_Class_Riligious_Traditional_ML_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

In [None]:
# Step 1: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pickle

# Step 2: Load the dataset
file_path = "/content/drive/MyDrive/Research Paper/Data Preprocessing/Cleaned_Labeled_Religious_Comments_Numeric_2.0.csv"
df = pd.read_csv(file_path)

# Step 3: Use comment text and numeric label
text_column = 'comment'
label_column = 'label_numeric'  # use numeric labels (0, 1, 2)

# Step 4: Split into train and test sets
X = df[text_column].astype(str)
y = df[label_column].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 6: Train Logistic Regression for multiclass
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_train_tfidf, y_train)

# Step 7: Predictions and evaluation
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 8: Save model and vectorizer
with open("/content/drive/MyDrive/Research Paper/Models/ Traditional ML Models/Riligious_logistic_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("/content/drive/MyDrive/Research Paper/Models/ Traditional ML Models/logistic_tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)




Accuracy: 0.7434362934362935
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.89      0.83      2552
           1       0.77      0.60      0.67      1102
           2       0.67      0.61      0.63      1526

    accuracy                           0.74      5180
   macro avg       0.74      0.70      0.71      5180
weighted avg       0.74      0.74      0.74      5180



# SVM

In [None]:
# Step 1: Install required library for saving model
!pip install -q joblib

# Step 2: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Step 3: Load the dataset
file_path = "/content/drive/MyDrive/Research Paper/Data Preprocessing/Cleaned_Labeled_Religious_Comments_Numeric_2.0.csv"
df = pd.read_csv(file_path)

# Step 4: Define feature and numeric label columns
text_column = 'comment'
label_column = 'label_numeric'  # ✅ Use numeric labels for 3-class classification
X = df[text_column].astype(str)
y = df[label_column].astype(int)

# Step 5: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 7: Train SVM model (multiclass with linear kernel)
svm_model = SVC(kernel='linear', decision_function_shape='ovr')  # One-vs-Rest for multi-class
svm_model.fit(X_train_tfidf, y_train)

# Step 8: Evaluate model
y_pred = svm_model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 9: Save model and vectorizer
joblib.dump(svm_model, '/content/drive/MyDrive/Research Paper/Models/ Traditional ML Models/Riligious_svm_model.joblib')
joblib.dump(vectorizer, '/content/drive/MyDrive/Research Paper/Models/ Traditional ML Models/svm_tfidf_vectorizer.joblib')

print("✅ Model saved as 'Riligious_svm_model.joblib'")
print("✅ Vectorizer saved as 'svm_tfidf_vectorizer.joblib'")


Accuracy: 0.7447876447876448
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.89      0.83      2552
           1       0.77      0.62      0.69      1102
           2       0.67      0.59      0.63      1526

    accuracy                           0.74      5180
   macro avg       0.74      0.70      0.71      5180
weighted avg       0.74      0.74      0.74      5180

✅ Model saved as 'Riligious_svm_model.joblib'
✅ Vectorizer saved as 'svm_tfidf_vectorizer.joblib'


#  Random Forest model

In [None]:
# Step 1: Install joblib for model saving
!pip install -q joblib

# Step 2: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import matplotlib.pyplot as plt
import numpy as np

# Step 3: Load dataset
file_path = "/content/drive/MyDrive/Research Paper/Data Preprocessing/Cleaned_Labeled_Religious_Comments_Numeric_2.0.csv"
df = pd.read_csv(file_path)

# Step 4: Check and define columns
print(df.head())
print(df.columns)

text_column = 'comment'           # Feature column
label_column = 'label_numeric'    # ✅ Numeric 3-class label column

# Step 5: Split data
X = df[text_column].astype(str)
y = df[label_column].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 7: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_tfidf, y_train)

# Step 8: Evaluate model
y_pred = rf_model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 9: Save model and vectorizer
joblib.dump(rf_model, '/content/drive/MyDrive/Research Paper/Models/ Traditional ML Models/Riligious_random_forest_model.joblib')
joblib.dump(vectorizer, '/content/drive/MyDrive/Research Paper/Models/ Traditional ML Models/tfidf_vectorizer.joblib')

print("✅ Model saved as 'Riligious_random_forest_model.joblib'")
print("✅ Vectorizer saved as 'random_forest_tfidf_vectorizer.joblib'")


                                             comment                 label  \
0  বকরি ঈদ  রমজান  মহরমে পরিবেশ দুষিত হয় না  এগুল...             Religious   
1  ওরে তোদের লুঙ্গি দাদু ও ছেলের গলা কেটেছিল  তুই...         Not Religious   
2                   ইসলাম বুঝলে ও আরও বর্বর হয়ে যাবে  Religious Harassment   
3                           ভাই  হিন্দু কোন ধর্মই না             Religious   
4       এই ভাবনাটাই কোনো ধর্মের সবচেয়ে নিচ ও হীন দিক             Religious   

   label_numeric  
0              1  
1              0  
2              2  
3              1  
4              1  
Index(['comment', 'label', 'label_numeric'], dtype='object')
Accuracy: 0.7386100386100386
Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.90      0.82      2552
           1       0.79      0.58      0.67      1102
           2       0.67      0.58      0.62      1526

    accuracy                           0.74      5180
   macro avg       0.74      0.