<a href="https://colab.research.google.com/github/Fuad-Khan/Religious-Harassment-Models/blob/main/Research_Traditional_ML_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

In [None]:
# Step 1: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Step 2: Load the dataset
file_path = "/content/Cleaned_Labeled_Religious_Comments.csv"  # Make sure to upload the file to Colab
df = pd.read_csv(file_path)

# Step 3: Inspect the dataset
print(df.head())
print(df.columns)

# Step 4: Rename columns if necessary (assume text and label are 'comment' and 'label')
text_column = 'comment'  # Change if different
label_column = 'label'   # Change if different

# Step 5: Split data into train and test sets
X = df[text_column]
y = df[label_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Convert text to TF-IDF vectors
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 7: Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Step 8: Make predictions and evaluate
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


                                            comment                 label  \
0  ঘরে বসে শুট করতে কেমন লেগেছে ক্যামেরাতে কে ছিলেন         Not Religious   
1                           অরে বাবা এই টা কোন পাগল  Religious Harassment   
2                             ক্যাপ্টেন অফ বাংলাদেশ         Not Religious   
3                               অন্যরকম  ভালো লাগলো         Not Religious   
4                       মোহাম্মদ কফিল উদ্দীন মাহমুদ             Religious   

   label_binary  
0             0  
1             1  
2             0  
3             0  
4             0  
Index(['comment', 'label', 'label_binary'], dtype='object')
Accuracy: 0.7554537521815009
Classification Report:
                       precision    recall  f1-score   support

       Not Religious       0.77      0.89      0.83      2278
           Religious       0.79      0.59      0.68       958
Religious Harassment       0.70      0.65      0.67      1348

            accuracy                           0.76      4584
    

# SVM

In [None]:
# Step 1: Install required library for saving model
!pip install joblib

# Step 2: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Step 3: Load the dataset
file_path = "/content/Cleaned_Labeled_Religious_Comments.csv"  # Ensure file is uploaded
df = pd.read_csv(file_path)

# Step 4: Inspect dataset
print(df.head())
print(df.columns)

# Step 5: Define feature and label columns
text_column = 'comment'  # Update if different
label_column = 'label'   # Update if different
X = df[text_column]
y = df[label_column]

# Step 6: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 8: Train SVM model
svm_model = SVC(kernel='linear')  # Linear kernel works well for high-dimensional data
svm_model.fit(X_train_tfidf, y_train)

# Step 9: Evaluate model
y_pred = svm_model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 10: Save the trained model and vectorizer
joblib.dump(svm_model, 'svm_model.joblib')
joblib.dump(vectorizer, 'tfidf_vectorizer.joblib')

print("Model and vectorizer saved as 'svm_model.joblib' and 'tfidf_vectorizer.joblib'")


                                            comment                 label  \
0  ঘরে বসে শুট করতে কেমন লেগেছে ক্যামেরাতে কে ছিলেন         Not Religious   
1                           অরে বাবা এই টা কোন পাগল  Religious Harassment   
2                             ক্যাপ্টেন অফ বাংলাদেশ         Not Religious   
3                               অন্যরকম  ভালো লাগলো         Not Religious   
4                       মোহাম্মদ কফিল উদ্দীন মাহমুদ             Religious   

   label_binary  
0             0  
1             1  
2             0  
3             0  
4             0  
Index(['comment', 'label', 'label_binary'], dtype='object')
Accuracy: 0.7528359511343804
Classification Report:
                       precision    recall  f1-score   support

       Not Religious       0.77      0.89      0.83      2278
           Religious       0.79      0.59      0.68       958
Religious Harassment       0.69      0.64      0.66      1348

            accuracy                           0.75      4584
    

#  Random Forest model

In [6]:
# Step 1: Install joblib for model saving
!pip install joblib

# Step 2: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import matplotlib.pyplot as plt
import numpy as np

# Step 3: Load dataset
file_path = "/content/Cleaned_Labeled_Religious_Comments.csv"  # Adjust if needed
df = pd.read_csv(file_path)

# Step 4: Check and define columns
print(df.head())
print(df.columns)
text_column = 'comment'  # Change if necessary
label_column = 'label'

# Step 5: Split data
X = df[text_column]
y = df[label_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 7: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_tfidf, y_train)

# Step 8: Evaluate model
y_pred = rf_model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


# Step 10: Save model and vectorizer
joblib.dump(rf_model, 'random_forest_model.joblib')
joblib.dump(vectorizer, 'tfidf_vectorizer.joblib')
print("Model and vectorizer saved as 'random_forest_model.joblib' and 'tfidf_vectorizer.joblib'")


                                            comment                 label  \
0  ঘরে বসে শুট করতে কেমন লেগেছে ক্যামেরাতে কে ছিলেন         Not Religious   
1                           অরে বাবা এই টা কোন পাগল  Religious Harassment   
2                             ক্যাপ্টেন অফ বাংলাদেশ         Not Religious   
3                               অন্যরকম  ভালো লাগলো         Not Religious   
4                       মোহাম্মদ কফিল উদ্দীন মাহমুদ             Religious   

   label_binary  
0             0  
1             1  
2             0  
3             0  
4             0  
Index(['comment', 'label', 'label_binary'], dtype='object')
Accuracy: 0.7449825479930192
Classification Report:
                       precision    recall  f1-score   support

       Not Religious       0.76      0.89      0.82      2278
           Religious       0.82      0.56      0.67       958
Religious Harassment       0.67      0.62      0.65      1348

            accuracy                           0.74      4584
    