In [1]:
import pandas as pd
import pickle
import re
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
df = pd.read_csv("new_news.csv", names=['title', 'content'])

In [3]:
df.fillna('', inplace=True)

In [4]:
df

Unnamed: 0,title,content
0,Liverpool vs Bologna: LIVE Stream and Score Up...,
1,Boko Haram And ISWAP: Bloodshed And Rivalry - ...,A screengrab from the video showing the execut...
2,AMD’s Ryzen 5000 processors with integrated gr...,"Not just for OEMs anymore\r\nIn April, AMD ann..."
3,Amnesty Demands Probe Into Egypt Army ‘Executi...,Amnesty International called Thursday for an i...
4,Laptops: What you need to know before you buy ...,Its almost time to get back to school and coll...
5,Who are authorized persons outside of residenc...,The Inter-Agency Task Force on Emerging Infect...
6,'Uncaring' mum and boyfriend found guilty of k...,"An ""uncaring"" mum has been convicted alongside..."
7,Pokémon Go's most prominent players call for N...,Pokémon Go's most high profile players have ca...
8,Inflation slows to 4% in July - Philstar.com,"MANILA, Philippines — Inflation further eased ..."
9,Nintendo's profit misses estimates - Taipei Times,Nintendos profit misses estimates\r\nNintendo ...


In [5]:
def clean_words(text):
    text = re.sub('[^a-zA-Z ]', '', text)
    return text

In [6]:
df['content'] = df['content'].apply(clean_words)

In [7]:
vec = pickle.load(open("vectorizer.pk",'rb'))

In [8]:
model = pickle.load(open("Final_model.sav",'rb'))

In [9]:
vec_data = vec.transform(df['content'])

In [10]:
probability_prediction = model.predict_proba(vec_data)

In [11]:
probability_prediction

array([[0.8439468 , 0.1560532 ],
       [0.66526531, 0.33473469],
       [0.86266503, 0.13733497],
       [0.5646402 , 0.4353598 ],
       [0.85748256, 0.14251744],
       [0.55368025, 0.44631975],
       [0.80583265, 0.19416735],
       [0.64901131, 0.35098869],
       [0.6543072 , 0.3456928 ],
       [0.62515957, 0.37484043],
       [0.83624296, 0.16375704],
       [0.19124995, 0.80875005],
       [0.45917579, 0.54082421],
       [0.64491506, 0.35508494],
       [0.83795144, 0.16204856],
       [0.74507428, 0.25492572],
       [0.69517111, 0.30482889],
       [0.73149007, 0.26850993],
       [0.8439468 , 0.1560532 ],
       [0.77214815, 0.22785185]])

In [12]:
def label_prediction(probab):
    if len(probab) < 2:
        return "Cannot determine"
    if probab[0] >0.8:
        return "Highly Likely True"
    elif probab[0] >0.6:
        return "Likely True"
    elif probab[0] > 0.4:
        return "Cannot determine"
    elif probab[0] > 0.2:
        return "Likely False"
    else:
        return "Highly Likely False"

In [13]:
df['prediction'] = list(map(label_prediction, probability_prediction))

In [14]:
df

Unnamed: 0,title,content,prediction
0,Liverpool vs Bologna: LIVE Stream and Score Up...,,Highly Likely True
1,Boko Haram And ISWAP: Bloodshed And Rivalry - ...,A screengrab from the video showing the execut...,Likely True
2,AMD’s Ryzen 5000 processors with integrated gr...,Not just for OEMs anymoreIn April AMD announce...,Highly Likely True
3,Amnesty Demands Probe Into Egypt Army ‘Executi...,Amnesty International called Thursday for an i...,Cannot determine
4,Laptops: What you need to know before you buy ...,Its almost time to get back to school and coll...,Highly Likely True
5,Who are authorized persons outside of residenc...,The InterAgency Task Force on Emerging Infecti...,Cannot determine
6,'Uncaring' mum and boyfriend found guilty of k...,An uncaring mum has been convicted alongside h...,Highly Likely True
7,Pokémon Go's most prominent players call for N...,Pokmon Gos most high profile players have call...,Likely True
8,Inflation slows to 4% in July - Philstar.com,MANILA Philippines Inflation further eased in...,Likely True
9,Nintendo's profit misses estimates - Taipei Times,Nintendos profit misses estimatesNintendo Co h...,Likely True
