In [25]:
import pandas as pd
import numpy as np 

In [27]:
true = pd.read_csv('True.csv')
fake = pd.read_csv('Fake.csv')

In [29]:
true['label'] = 1
fake['label'] = 0

In [31]:
news = pd.concat([fake, true], axis = 0)

In [33]:
news.isnull().sum()

title      0
text       0
subject    0
date       0
label      0
dtype: int64

In [39]:
news = news.drop(['title', 'subject', 'date'],axis = 1)

In [45]:
news = news.sample(frac = 1)

In [47]:
news.head()

Unnamed: 0,text,label
17515,VIENNA (Reuters) - The chief of the U.N. atomi...,1
22043,21st Century Wire says It s been confirmed tha...,0
13221,LONDON (Reuters) - Britain s Brexit minister D...,1
6063,"Fox News, Donald Trump, and the rest of the ha...",0
19841,JAKARTA (Reuters) - Students at an Islamic sch...,1


In [49]:
news.reset_index(inplace = True)

In [51]:
news.head()

Unnamed: 0,index,text,label
0,17515,VIENNA (Reuters) - The chief of the U.N. atomi...,1
1,22043,21st Century Wire says It s been confirmed tha...,0
2,13221,LONDON (Reuters) - Britain s Brexit minister D...,1
3,6063,"Fox News, Donald Trump, and the rest of the ha...",0
4,19841,JAKARTA (Reuters) - Students at an Islamic sch...,1


In [55]:
news.drop(['index'],axis = 1, inplace = True)

In [57]:
news.head()

Unnamed: 0,text,label
0,VIENNA (Reuters) - The chief of the U.N. atomi...,1
1,21st Century Wire says It s been confirmed tha...,0
2,LONDON (Reuters) - Britain s Brexit minister D...,1
3,"Fox News, Donald Trump, and the rest of the ha...",0
4,JAKARTA (Reuters) - Students at an Islamic sch...,1


In [59]:
import re 

In [63]:
def wordopt(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+','', text)
    text = re.sub(r'<.*?', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d', '', text)
    text = re.sub(r'\n', ' ', text)
    return text

In [67]:
news['text'] = news['text'].apply(wordopt)

In [69]:
news['text']

0        vienna reuters  the chief of the un atomic wat...
1        st century wire says it s been confirmed that ...
2        london reuters  britain s brexit minister davi...
3        fox news donald trump and the rest of the half...
4        jakarta reuters  students at an islamic school...
                               ...                        
44893    ottawa reuters  the united states sees no urge...
44894    washington reuters  us senate republican leade...
44895    dunkin  donuts is an american global donut com...
44896    if there s one presidential candidate who is t...
44897    paris reuters  france s air accident investiga...
Name: text, Length: 44898, dtype: object

In [71]:
x = news['text']
y = news['label']

In [73]:
from sklearn.model_selection import train_test_split

In [75]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3)

In [79]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [83]:
vectorization = TfidfVectorizer()

In [85]:
xv_train = vectorization.fit_transform(x_train)

In [87]:
xv_test = vectorization.transform(x_test)

In [117]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [103]:
DTC = DecisionTreeClassifier()

In [105]:
DTC.fit(xv_train, y_train)

In [107]:
predict_dtc = DTC.predict(xv_test)

In [109]:
DTC.score(xv_test, y_test)

0.9968819599109131

In [115]:
print(classification_report(y_test, predict_dtc))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7035
           1       1.00      1.00      1.00      6435

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



In [119]:
conf_matrix = confusion_matrix(y_test, predict_dtc)
print("Confusion Matrix:\n", conf_matrix)

Confusion Matrix:
 [[7018   17]
 [  25 6410]]


In [121]:
def output_label(n):
    if n == 0:
        return "Its A Fake News"
    elif n == 1:
        return "Its A Genuine News"

In [129]:
def manual_testing(news):
    testing_news = {"text": [news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    predict_dtc = DTC.predict(xv_test)
    return "\nDTC Prediction: {}".format(output_label(predict_dtc[0]))

In [131]:
news_article = str(input())

 Teenage footballers listen to the Russian national anthem before a match. Nearby, artists paint Russian President Vladimir Putin’s portrait on a wall during a graffiti festival.  Welcome to Burkina Faso, one of the African nations where Russia is boosting its operations to gain influence.  Evidence found by the BBC shows that Russia is using media and cultural initiatives to attract African journalists, influencers, and students while spreading misleading information.  These events are being promoted by African Initiative, a newly founded Russian media organisation which defines itself as an “information bridge between Russia and Africa”. It inherited structures previously set up by the dismantled Wagner mercenary group and is believed by experts to have links with the Russian security services.  Registered in September 2023, a month after Wagner’s leader Yevgeny Prigozhin died in a plane crash, African Initiative has welcomed former employees from his disbanded enterprises.  Its effo

In [133]:
manual_testing(news_article)

'\nDTC Prediction: Its A Fake News'

In [135]:
import joblib

In [137]:
joblib.dump(DTC, 'decision_tree_model.pkl')

['decision_tree_model.pkl']

In [139]:
joblib.dump(vectorization, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']