In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import re

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [6]:
from sklearn.ensemble import RandomForestClassifier

In [7]:
from sklearn.metrics import accuracy_score

In [8]:

data = pd.read_csv('C:/Users/maity/OneDrive/Desktop/Pworld/svm/data.csv')

In [9]:
data.shape

(4009, 4)

In [10]:
data.head()

Unnamed: 0,URLs,Headline,Body,Label
0,http://www.bbc.com/news/world-us-canada-414191...,Four ways Bob Corker skewered Donald Trump,Image copyright Getty Images\nOn Sunday mornin...,1
1,https://www.reuters.com/article/us-filmfestiva...,Linklater's war veteran comedy speaks to moder...,"LONDON (Reuters) - “Last Flag Flying”, a comed...",1
2,https://www.nytimes.com/2017/10/09/us/politics...,Trump’s Fight With Corker Jeopardizes His Legi...,The feud broke into public view last week when...,1
3,https://www.reuters.com/article/us-mexico-oil-...,Egypt's Cheiron wins tie-up with Pemex for Mex...,MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...,1
4,http://www.cnn.com/videos/cnnmoney/2017/10/08/...,Jason Aldean opens 'SNL' with Vegas tribute,"Country singer Jason Aldean, who was performin...",1


In [11]:
data.columns

Index(['URLs', 'Headline', 'Body', 'Label'], dtype='object')

In [12]:
data.isnull().sum()

URLs         0
Headline     0
Body        21
Label        0
dtype: int64

In [13]:
df=data.copy()

In [14]:
df['Body']=df['Body'].fillna('')

In [15]:
df.isnull().sum()

URLs        0
Headline    0
Body        0
Label       0
dtype: int64

In [16]:
df['News'] = df['Headline']+df['Body']

In [17]:
df.head()

Unnamed: 0,URLs,Headline,Body,Label,News
0,http://www.bbc.com/news/world-us-canada-414191...,Four ways Bob Corker skewered Donald Trump,Image copyright Getty Images\nOn Sunday mornin...,1,Four ways Bob Corker skewered Donald TrumpImag...
1,https://www.reuters.com/article/us-filmfestiva...,Linklater's war veteran comedy speaks to moder...,"LONDON (Reuters) - “Last Flag Flying”, a comed...",1,Linklater's war veteran comedy speaks to moder...
2,https://www.nytimes.com/2017/10/09/us/politics...,Trump’s Fight With Corker Jeopardizes His Legi...,The feud broke into public view last week when...,1,Trump’s Fight With Corker Jeopardizes His Legi...
3,https://www.reuters.com/article/us-mexico-oil-...,Egypt's Cheiron wins tie-up with Pemex for Mex...,MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...,1,Egypt's Cheiron wins tie-up with Pemex for Mex...
4,http://www.cnn.com/videos/cnnmoney/2017/10/08/...,Jason Aldean opens 'SNL' with Vegas tribute,"Country singer Jason Aldean, who was performin...",1,Jason Aldean opens 'SNL' with Vegas tributeCou...


In [18]:
df.columns

Index(['URLs', 'Headline', 'Body', 'Label', 'News'], dtype='object')

In [19]:
features_dropped = ['URLs' , 'Headline' , 'Body']
df = df.drop(features_dropped , axis =1)

In [20]:
df.columns

Index(['Label', 'News'], dtype='object')

In [21]:
ps = PorterStemmer()
def wordopt(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = text.split()
    text = [ps.stem(word) for word in text if not word in stopwords.words('english')]
    text = ' '.join(text)
    return text

In [22]:
df['News'] = df['News'].apply(wordopt)

In [23]:
df.head()

Unnamed: 0,Label,News
0,1,four way bob corker skewer donald trumpimag co...
1,1,linklat war veteran comedi speak modern americ...
2,1,trump fight corker jeopard legisl agendath feu...
3,1,egypt cheiron win tie pemex mexican onshor oil...
4,1,jason aldean open snl vega tributecountri sing...


In [24]:
X = df['News']
Y =  df['Label']

In [25]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25)

In [27]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [29]:
RFC_model = RandomForestClassifier(random_state=0)
RFC_model.fit(xv_train, y_train)
rfc_y_pred = RFC_model.predict(xv_test)
score = accuracy_score(y_test, rfc_y_pred)
print('Accuracy of RFC model is ', score)

Accuracy of RFC model is  0.9710867397806581


In [30]:
svm_model = SVC(kernel='linear')
svm_model.fit(xv_train, y_train)
svm_y_pred = svm_model.predict(xv_test)
score = accuracy_score(y_test,svm_y_pred)
print('Accuracy of SVM model is :', score)

Accuracy of SVM model is : 0.9800598205383848


In [31]:
LR_model = LogisticRegression()
LR_model.fit(xv_train, y_train)
lr_y_pred = LR_model.predict(xv_test)
score = accuracy_score(y_test, lr_y_pred)
print('Accuracy of LR model is :' , score)

Accuracy of LR model is : 0.9740777666999003


In [32]:
def fake_news_det(news):
    input_data = {"text":[news]}
    new_def_test = pd.DataFrame(input_data)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    vectorized_input_data = vectorization.transform(new_x_test)
    prediction = svm_model.predict(vectorized_input_data)
    if prediction == 1:
        print("Not a Fake News")
    else:
        print("Fake News")
    

In [33]:
fake_news_det("JetNation FanDuel League; Week 4 of readers think this story is Fact. Add your two cents.(Before Its News)Our FanDuel league is back again this week. Here are the details:$900 in total prize money. $250 to the winner. $10 to enter.Remember this is a one week league, pick your lineup against the salary cap and next week if you want to play again you can pick a completely different lineup if you want.Click this link to enter — http://fanduel.com/JetNation You can discuss this with other NY Jets fans on the Jet Nation message board. Or visit of on Facebook.Source: http://www.jetnation.com/2017/09/27/jetnation-fanduel-league-week-4/")

Fake News


In [34]:
fake_news_det("""The second Covid-19 wave in India is now on the "downswing," the Centre said on Thursday, highlighting that the current number of active cases is still "very high" and advised states and Union territories (UTs) to not let down their guards.""")

Not a Fake News


In [35]:
fake_news_det("""North Korea's Other Threat: Why EMP Becomes An Extinction Level Event""")

Fake News
