Identifying Misinformation Spread on Social Media.


In [3]:
import pandas as pd
import numpy as np
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

nltk.download('stopwords')
from nltk.corpus import stopwords
import string

df = pd.read_csv('text_label.csv')  

def preprocess_text(text):
    text = text.lower()  
    text = text.translate(str.maketrans('', '', string.punctuation))  
    tokens = text.split()  
    tokens = [word for word in tokens if word not in stopwords.words('english')]  
    return ' '.join(tokens)

df['cleaned_text'] = df['text'].apply(preprocess_text)

X = df['cleaned_text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_tfidf, y_train)


y_pred = model.predict(X_test_tfidf)

print(classification_report(y_test, y_pred))


new_posts = ['This statement is completely false.', 'Research shows this to be true.']
new_posts_cleaned = [preprocess_text(post) for post in new_posts]
new_posts_tfidf = vectorizer.transform(new_posts_cleaned)
predictions = model.predict(new_posts_tfidf)

for post, pred in zip(new_posts, predictions):
    print(f'Post: "{post}" - Prediction: {"True" if pred == 1 else "False"}')

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Post: "This statement is completely false." - Prediction: False
Post: "Research shows this to be true." - Prediction: True


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\chand\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
