In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load the dataset
data = pd.read_csv('Datasets\labeled_data.csv')

# Split the dataset into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(data['tweet'], data['class'], test_size=0.2, random_state=42)

# Preprocess the text data using TF-IDF vectorizer
vectorizer = TfidfVectorizer()
train_vectors = vectorizer.fit_transform(train_data)
test_vectors = vectorizer.transform(test_data)

# Train a Random Forest Classifier
classifier = RandomForestClassifier()
classifier.fit(train_vectors, train_labels)

# Make predictions on the test set
predictions = classifier.predict(test_vectors)

# Evaluate the model
print(classification_report(test_labels, predictions))

# User input for prediction
user_tweet = input("Enter a tweet: ")
user_tweet_vector = vectorizer.transform([user_tweet])
prediction = classifier.predict(user_tweet_vector)
print(f"Prediction: {'Cyberbullying' if prediction == 1 else 'Non-cyberbullying'}")

              precision    recall  f1-score   support

           0       0.57      0.07      0.13       290
           1       0.86      0.98      0.92      3832
           2       0.88      0.54      0.67       835

    accuracy                           0.86      4957
   macro avg       0.77      0.53      0.57      4957
weighted avg       0.84      0.86      0.83      4957

Enter a tweet: you are sexist
Prediction: Cyberbullying


In [None]:
import pickle
filename = 'cyberbully_model'
pickle.dump(clf,open(filename,'wb'))