In [10]:
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
import re
import pickle

# Read the CSV file
df = pd.read_csv("News_2.csv", encoding='latin1')

# Check for missing values
df.isnull().sum()

# Get class counts
class_count = df["Label"].value_counts()

# Perform stemming
snowball_stem = SnowballStemmer('english')

def stemming(content):
    con = re.sub('[^a-zA-Z]', ' ', content)
    con = con.lower()
    con = con.split()
    con = [snowball_stem.stem(word) for word in con if not word in stopwords.words('english')]
    con = ' '.join(con)
    return con


df['News'] = df['News'].apply(stemming)

# Split data into features and labels
x = df['News']
y = df['Label']

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

# Create TF-IDF vectorizer
vect = TfidfVectorizer()

# Fit and transform the training data
x_train = vect.fit_transform(x_train)

# Transform the testing data
x_test = vect.transform(x_test)

# Create and train the SVM model
model = SVC()
model.fit(x_train, y_train)

In [11]:
# Make predictions on the testing data
prediction = model.predict(x_test)

# Calculate the model accuracy
accuracy = model.score(x_test, y_test)
print("Accuracy:", accuracy)

# Save the vectorizer and model using pickle
pickle.dump(vect, open('vector.pkl', 'wb'))
pickle.dump(model, open('model.pkl', 'wb'))

# Load the vectorizer and model from the saved files
vector_form = pickle.load(open('vector.pkl', 'rb'))
load_model = pickle.load(open('model.pkl', 'rb'))

# Function to classify news as fake or not
def fake_news(news):
    news = stemming(news)
    input_data = [news]
    vector_form1 = vector_form.transform(input_data)
    prediction = load_model.predict(vector_form1)
    return prediction

# Test the function with a news input
news_input = "This is a sample news article"
result = fake_news(news_input)
print(result)

Accuracy: 0.9814814814814815
['REAL']


In [36]:
result = fake_news('''Two young men, full of drink, were arrested and charged with spreading fake news. The pair were in Wright’s Lane in Kensington with their bundles of newspapers, and were heard to shout “Arrest of Jack the Ripper Tonight” at the top of their voices. William Macdonald and George Write, news vendors, were taken before the court but later discharged.

James Kendrick, another news vendor, was also charged with crying out false news about Jack the Ripper. After slow Sunday sales, he began calling that there was a “Horrible Discovery of a Missing Woman at Charing Cross”. Later, he cried out that four women were discovered slashed at Charing Cross. One customer, who bought one of his papers, found no such article inside and took the matter to the police. The news vendor was sentenced to 14 days in gaol, with hard labour. ''')
print(result)

['REAL']
