In [12]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax
import csv
import os
import glob

model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)  # 2 classes: hate speech and not hate speech

# Define a function for classification
def classify_text(text):
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    probabilities = softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1).item()

    if predicted_class == 0:
        return "Hate Speech"
    else:
        return "Not Hate Speech"

# Function to process the CSV file
def process_csv_file(file_path):
    with open(file_path, mode='r', encoding='utf-8') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            text = row["text"]
            classification = classify_text(text)
            print(f"Text: {text} - Classification: {classification}")

# Specify the directory where the downloads are stored
download_directory = r'C:\Users\srija\Downloads'  # Replace with the path to your Downloads directory

# Find the latest downloaded CSV file in the directory
list_of_files = glob.glob(os.path.join(download_directory, '*.csv'))
latest_file = max(list_of_files, key=os.path.getctime)

# Process the latest CSV file
process_csv_file(latest_file)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Text: I will rape you - Classification: Hate Speech
Text: old ankit was best - Classification: Hate Speech
Text: Season 5 nhi hogi kya 😢 - Classification: Not Hate Speech
Text: Please season 5 banav - Classification: Not Hate Speech
Text: 8743891585 raliye na phn mila liye bta dunga kit ana. 🙏 - Classification: Not Hate Speech
Text: @sudipta.ds_78 it's DOPA 🙏😁 - Classification: Not Hate Speech
Text: JhantOO Supremacy 👏❤️🔥 - Classification: Not Hate Speech
Text: yoo - Classification: Not Hate Speech
Text: Bhai  to jhatu k he fan hu 🫂🫂 - Classification: Not Hate Speech
Text: Jhatu and jaat 😍 - Classification: Hate Speech
Text: Jhatoo bhaiya ek din apke saath mein bhi webseries bnaunga - Classification: Not Hate Speech
