# Politics

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import re
import pandas as pd
import bs4
import requests
import spacy
from spacy import displacy
nlp = spacy.load('en_core_web_sm')

from spacy.matcher import Matcher 
from spacy.tokens import Span 

import networkx as nx

import matplotlib.pyplot as plt
from tqdm import tqdm

pd.set_option('display.max_colwidth', 200)
%matplotlib inline

In [None]:
candidate_sentences = pd.read_csv("/content/drive/MyDrive/Politics.csv")
candidate_sentences.shape

(2998, 3)

In [None]:
candidate_sentences.head()

Unnamed: 0,Titles,articleText,date_author
0,"Service delivery sole criteria to compete for public office, says PM","Prime Minister Shehbaz Sharif on Wednesday said politics in the country has been reduced to ""meaningless rhetoric"" and termed service delivery as the main criterion to compete for public office. \...","News Desk \nApril 19, 2023"
1,Grand dialogue on polls proposed,"QUETTA:\n\nSenator Samina Mumtaz Zehri has said that as far as new elections are concerned in Pakistan, for the sake of the integrity and the interest of the country, all the stakeholders should h...","APP \nApril 08, 2023"
2,Politicians advised to mend fences to prevent economic meltdown,KARACHI:\n\nPakistan's economy has partially shut down and millions of people have lost jobs due to continuous political instability. The nation is destined to encounter hyperinflation with the do...,"Salman Siddiqui \nApril 03, 2023"
3,Who cares about the economy in this political tug of war?,"KARACHI:\n\nThe dollar rate is now a religious hymn, with everyone from rags to riches quoting it. A country’s currency is one of the key reflections of its economic strength or lack thereof. Chat...","AAH Soomro \nApril 03, 2023"
4,"Jokes, racism: The many reactions to Humza Yousaf's appointment","Humza Yousaf made history after his appointment as the First Minister of Scotland --- being the first person of colour and the first Muslim to hold the coveted post. Of Pakistani descent, the new ...","Social Desk \nMarch 30, 2023"


In [None]:
doc = nlp("the drawdown process is governed by astm standard d823")

for tok in doc:
  print(tok.text, "...", tok.dep_)

the ... det
drawdown ... amod
process ... nsubjpass
is ... auxpass
governed ... ROOT
by ... agent
astm ... compound
standard ... compound
d823 ... pobj


In [None]:
def get_entities(sent):
  ## chunk 1
  ent1 = ""
  ent2 = ""

  prv_tok_dep = ""    # dependency tag of previous token in the sentence
  prv_tok_text = ""   # previous token in the sentence

  prefix = ""
  modifier = ""

  #############################################################
  
  for tok in nlp(sent):
    ## chunk 2
    # if token is a punctuation mark then move on to the next token
    if tok.dep_ != "punct":
      # check: token is a compound word or not
      if tok.dep_ == "compound":
        prefix = tok.text
        # if the previous word was also a 'compound' then add the current word to it
        if prv_tok_dep == "compound":
          prefix = prv_tok_text + " "+ tok.text
      
      # check: token is a modifier or not
      if tok.dep_.endswith("mod") == True:
        modifier = tok.text
        # if the previous word was also a 'compound' then add the current word to it
        if prv_tok_dep == "compound":
          modifier = prv_tok_text + " "+ tok.text
      
      ## chunk 3
      if tok.dep_.find("subj") == True:
        ent1 = modifier +" "+ prefix + " "+ tok.text
        prefix = ""
        modifier = ""
        prv_tok_dep = ""
        prv_tok_text = ""      

      ## chunk 4
      if tok.dep_.find("obj") == True:
        ent2 = modifier +" "+ prefix +" "+ tok.text
        
      ## chunk 5  
      # update variables
      prv_tok_dep = tok.dep_
      prv_tok_text = tok.text
  #############################################################

  return [ent1.strip(), ent2.strip()]

In [None]:
candidate_sentences.head()

Unnamed: 0,Titles,articleText,date_author
0,"Service delivery sole criteria to compete for public office, says PM","Prime Minister Shehbaz Sharif on Wednesday said politics in the country has been reduced to ""meaningless rhetoric"" and termed service delivery as the main criterion to compete for public office. \...","News Desk \nApril 19, 2023"
1,Grand dialogue on polls proposed,"QUETTA:\n\nSenator Samina Mumtaz Zehri has said that as far as new elections are concerned in Pakistan, for the sake of the integrity and the interest of the country, all the stakeholders should h...","APP \nApril 08, 2023"
2,Politicians advised to mend fences to prevent economic meltdown,KARACHI:\n\nPakistan's economy has partially shut down and millions of people have lost jobs due to continuous political instability. The nation is destined to encounter hyperinflation with the do...,"Salman Siddiqui \nApril 03, 2023"
3,Who cares about the economy in this political tug of war?,"KARACHI:\n\nThe dollar rate is now a religious hymn, with everyone from rags to riches quoting it. A country’s currency is one of the key reflections of its economic strength or lack thereof. Chat...","AAH Soomro \nApril 03, 2023"
4,"Jokes, racism: The many reactions to Humza Yousaf's appointment","Humza Yousaf made history after his appointment as the First Minister of Scotland --- being the first person of colour and the first Muslim to hold the coveted post. Of Pakistani descent, the new ...","Social Desk \nMarch 30, 2023"


In [None]:
entity_pairs = []

for i in tqdm(candidate_sentences["Titles"]):
  entity_pairs.append(get_entities(i))

100%|██████████| 2998/2998 [00:32<00:00, 93.02it/s] 


In [None]:
entity_pairs

[['public  PM', 'public  office'],
 ['Grand  dialogue', 'polls'],
 ['Politicians', 'economic  meltdown'],
 ['Who', 'political  war'],
 ['', 'many Humza appointment'],
 ['Economic  stability', 'politics'],
 ['Political  polarisation', 'superior  courts'],
 ['', 'Dirty  SDGs'],
 ['', ''],
 ['', ''],
 ['Abbasi', 'current  problems'],
 ['', 'Supremacy  ills'],
 ['High stakes election gamble', ''],
 ['', '2023'],
 ['youth', ''],
 ['Political  bickering', 'flood relief effort'],
 ['Political  parties', 'youth'],
 ['lack', 'political  participation'],
 ['we', 'lessons'],
 ['', 'social  contract'],
 ['political  chaos', ''],
 ['Alvi', 'joint  sitting'],
 ['', 'empathy'],
 ['', 'civilian  supremacy'],
 ['', 'dark  silhouettes'],
 ['', 'stability'],
 ['Lessons', ''],
 ['', 'needed  Economy'],
 ['who', 'world'],
 ['', ''],
 ['', 'floods'],
 ['', 'politics'],
 ['PDM economy', ''],
 ['', ''],
 ['Dialogue', 'political  uncertainty'],
 ['', 'leadership'],
 ['', 'time'],
 ['Pakistan', 'India chess eve

In [None]:
entity_pairs[10:20]

[['Abbasi', 'current  problems'],
 ['', 'Supremacy  ills'],
 ['High stakes election gamble', ''],
 ['', '2023'],
 ['youth', ''],
 ['Political  bickering', 'flood relief effort'],
 ['Political  parties', 'youth'],
 ['lack', 'political  participation'],
 ['we', 'lessons'],
 ['', 'social  contract']]

In [None]:
def get_relation(sent):

  doc = nlp(sent)

  # Matcher class object 
  matcher = Matcher(nlp.vocab)

  #define the pattern 
  pattern = [{'DEP':'ROOT'}, 
            {'DEP':'prep','OP':"?"},
            {'DEP':'agent','OP':"?"},  
            {'POS':'ADJ','OP':"?"}] 

  matcher.add("matching_1", [pattern]) 

  matches = matcher(doc)
  k = len(matches) - 1

  span = doc[matches[k][1]:matches[k][2]] 

  return(span.text)

In [None]:
relations = [get_relation(i) for i in tqdm(candidate_sentences['Titles'])]

100%|██████████| 2998/2998 [00:33<00:00, 88.77it/s]


In [None]:
pd.Series(relations).value_counts()[:50]

says           137
urges           33
seeks           30
is              26
claims          26
tells           26
asks            24
Imran           21
rejects         21
accuses         17
vows            16
demands         16
calls           16
set             15
slams           15
warns           14
takes           14
announces       14
wants           13
joins           12
decides         11
sees            11
politics        11
challenges      10
calls for       10
advises         10
meets            9
join             9
denies           9
gives            9
refuses          8
sets             8
meet             8
condemns         8
Nawaz            8
throws           7
sworn            7
launches         7
govt             7
passes           7
Politics of      7
Trump            7
Bilawal          7
lashes           6
see              6
agree            6
assures          6
need             6
visit            6
Is               6
dtype: int64

In [None]:
len(relations)

2998

In [None]:
relations

['says',
 'proposed',
 'advised',
 'cares about',
 'Jokes',
 'links to',
 'takes',
 'politics behind',
 'politics',
 'turbulence',
 'says',
 'panacea for',
 'politicking',
 'room',
 'is',
 'dents',
 'urged',
 'examined',
 'learnt',
 'Call for new',
 'Rashid',
 'cleared',
 'Politics of',
 'moment',
 'Iraq',
 'Usher in',
 'learnt',
 'Charter of',
 'Gorbachev',
 'polarisation',
 'Politicking amid',
 'Money in',
 'fiddles',
 'crisis',
 'urged',
 'Takeaways from',
 'stitch in',
 'pulls out',
 'Greed',
 'prevail',
 'son in',
 'raid',
 'is',
 'Is viral',
 'galore in',
 'media ablaze',
 'WATCH',
 'divides',
 'causes',
 'goes on',
 'passes',
 'shot at',
 'ban political',
 'define',
 'slide',
 'politics of',
 'uses',
 'politics',
 'laden with',
 'resigns from',
 'Know',
 'Politics',
 'slap on',
 'plans',
 'Politics of',
 'grants pre',
 'gets',
 'becomes',
 'throws',
 'refuses',
 'expels',
 'Haideri for',
 'inclusion of',
 'says',
 'says',
 'condemns',
 'seeks',
 'rejects',
 'challenges',
 'Imran

# Health

In [None]:
candidate_sentences2 = pd.read_csv("/content/drive/MyDrive/HealthNews.csv")
candidate_sentences2.shape

(2997, 3)

In [None]:
candidate_sentences2.head()

Unnamed: 0,articleText,date_author,News Title
0,"LAHORE:\n\nTo mark World Health Day, a special awareness seminar was organized on Friday at Fatima Jinnah Medical University.\nCaretaker Provincial Health Ministers Dr Javed Akram and Dr Jamal Nas...","Our Correspondent \nApril 08, 2023",‘29% of population suffering from diabetes’
1,PESHAWAR:\n\nA high level 13-member committee has been formed by the Khyber-Pakhtunkhwa (K-P) government to implement the dengue action plan 2023 across the province.\nTalking to The Express Tribu...,"Our Correspondent \nApril 06, 2023",K-P readies plan to combat dengue
2,"PESHAWAR:\n\nThe number of Covid-19 cases have been on the rise in Khyber-Pakhtunkhwa (K-P) once again as 12 new cases have been reported in the last 24 hours, an official of the health department...","Our Correspondent \nApril 01, 2023",Covid-19 cases on the rise in K-P again
3,"QUETTA:\n\nBalochistan cabinet on Friday approved health card program for all families of the province to ensure quality healthcare facilities for the local residents.\nChief Minister Balochistan,...","Syed Ali Shah \nApril 01, 2023",Balochistan cabinet approves health card
4,"PESHAWAR:\n\nThe number of Covid-19 has jumped up once again in Khyber-Pakhtunkhwa (K-P) including Peshawar, an official of the Health Department said here Saturday.\nHe observed that despite the ...","Our Correspondent \nMarch 26, 2023",Seven new Covid-19 cases detected in K-P


In [None]:
entity_pairs2 = []

for i in tqdm(candidate_sentences2["News Title"]):
  entity_pairs2.append(get_entities(i))

100%|██████████| 2997/2997 [00:28<00:00, 106.05it/s]


In [None]:
relations2 = [get_relation(i) for i in tqdm(candidate_sentences2['News Title'])]

100%|██████████| 2997/2997 [00:29<00:00, 102.36it/s]


In [None]:
pd.Series(relations2).value_counts()[:50]

says              64
study             51
reports           28
ways              25
claims            21
seeks             19
cases             17
is                17
urges             16
told              15
sees              13
Govt              13
say               13
vows              12
help              11
finds             11
reasons           11
tips              11
urged             10
foods             10
set               10
gets              10
launches          10
sets               9
benefits of        9
be                 9
tests positive     9
continues          8
are                8
drive              8
test positive      8
continue           8
launched           8
die of             8
surpasses          8
orders             8
approves           8
shows              8
get                8
Coronavirus        7
Day                7
hits               7
puts               7
found              7
govt               7
reveals            7
takes              7
surface in   

In [None]:
relations2

['% of',
 'plan',
 'cases on',
 'approves',
 'cases',
 'awaited',
 'provide',
 'set',
 'employs new',
 'opens in',
 'fortification',
 'reactivates',
 'dilemma',
 'care',
 'outlined',
 'start anti',
 'formed',
 'begins in',
 'stunted in',
 'miss anti',
 'disease on',
 'held',
 'reviewed',
 'planning',
 'Minister for universal',
 'worries over',
 'plays',
 'get',
 'test',
 'warns of',
 'given legal',
 'Govt',
 'priorities',
 'rises as',
 'faces',
 'sign',
 'found in',
 'discussed',
 'die of',
 'plague',
 'made',
 'centres',
 'Steer',
 'reviewed',
 'continues',
 'Growth',
 'elevated',
 'cases',
 'stages',
 'get',
 'campaign',
 'safety of',
 'inaugurates',
 'warn against',
 'need assistive',
 'Pakistanis',
 'cause of',
 'clarifies',
 'held',
 'claims',
 'vaccinated against',
 'alarming',
 'respite from',
 'rue governmental',
 'Funds',
 'targets',
 'launches',
 'children at',
 'recovered from',
 'visits',
 'prepares national',
 'delivers',
 'Alvi for early',
 'enters',
 'drive on',
 'vaccin

# Fake News

In [None]:
candidate_sentences3 = pd.read_excel("/content/drive/MyDrive/pakistani_dataset_consolidated.xlsx")
candidate_sentences3.shape

(12008, 13)

In [None]:
candidate_sentences3.tail()

Unnamed: 0,URL,Title,Text,Review Date,Textual Rating,Publisher Site,Publisher Name,Claim Date,Claimant,Content,Published At,Author,Url to Image
12003,https://www.factchecker.in/fact-check/electricity-power-tariff-punjab-costliest-aap-leaders-758403,Is Electricity Costliest in Punjab as AAP Leaders Claim?,"Punjab doesnt charge the most for electricity in the country, Maharashtra and Rajasthan do",2021-06-29T13:06:36Z,False,factchecker.in,FactChecker.in,2021-06-29T13:06:36Z,AAP Leader Raghav Chadha and MP Bhagwant Mann,,,,
12004,https://www.altnews.in/old-fake-message-about-ndtv-prannoy-roy-and-his-wife-viral-again/,Previous Story False message makes claims about NDTV and its ...,"NDTV's Prannoy Roy is Pakistani, his wife Radhika's real name is Rahila",2021-06-11T00:00:00Z,False,altnews.in,Alt News,2016-06-20T00:00:00Z,Social Media,,,,
12005,https://www.altnews.in/werewolf-sculpture-shared-with-unrelated-video-from-pak-believed-to-be-an-incident-from-mp/,Werewolf sculpture shared with unrelated video from Pak believed ...,"'Werewolf was spotted in Bandhavgarh, Madhya Pradesh",2021-06-23T00:00:00Z,False,altnews.in,Alt News,2016-06-20T00:00:00Z,WhatsApp forward,,,,
12006,https://scroll.in/article/998893/fact-check-is-electricity-in-punjab-really-the-countrys-most-expensive-as-aap-leaders-claim,Fact check: Is electricity in Punjab really the country's most ...,Electricity in Punjab is the costliest in India.,2021-07-01T08:00:00Z,False,scroll.in,Scroll.in,2021-07-01T08:00:00Z,AAP leaders Raghav Chadha and Bhagwant Mann,,,,
12007,https://www.boomlive.in/fake-quote-on-communal-hatred-falsely-credited-to-jammu-and-kashmir-police-officer/,Fake Quote On Communal Hatred Falsely Credited To Jammu and ...,"Muslim majority areas do not attack and beat to death Hindus, said Patiala SP.",2019-08-30T15:59:58Z,Fake,boomlive.in,BOOM Live,2019-08-30T15:59:58Z,Facebook post,,,,


In [None]:
candidate_sentences3.dtypes

URL               object
Title             object
Text              object
Review Date       object
Textual Rating    object
Publisher Site    object
Publisher Name    object
Claim Date        object
Claimant          object
Content           object
Published At      object
Author            object
Url to Image      object
dtype: object

In [None]:
candidate_sentences3['Title'].isnull()

0         True
1         True
2        False
3        False
4        False
         ...  
12003    False
12004    False
12005    False
12006    False
12007    False
Name: Title, Length: 12008, dtype: bool

In [None]:
candidate_sentences3 = candidate_sentences3.fillna('Imran Khan is Ex Prime Minister')

In [None]:
candidate_sentences3['Title'] = candidate_sentences3['Title'].astype('str') 

In [None]:
entity_pairs3 = []

for i in tqdm(candidate_sentences3["Title"]):
  entity_pairs3.append(get_entities(i))

100%|██████████| 12008/12008 [02:05<00:00, 95.74it/s] 


In [None]:
relations3 = [get_relation(i) for i in tqdm(candidate_sentences3["Title"])]

100%|██████████| 12008/12008 [02:06<00:00, 94.70it/s] 


In [None]:
pd.Series(relations3).value_counts()[:50]

says          294
is            292
Check         169
Is             81
shared as      63
Says           58
Times          53
's             46
In             43
takes          40
say            40
are            40
shows          39
Are            37
approves       37
Shared As      37
Cricket        36
claims         32
tells          32
seeks          31
release of     29
Modi           29
asks           29
set            29
be             29
gets           28
India          28
’s             26
make           24
has            23
Watch          23
show           22
was            22
Explained      21
Report         21
News           21
arrests        21
demands        20
claim          20
leads          20
Was            20
shared         19
have           18
Video Of       17
urges          17
begins         17
kills          17
Passed         17
seek           16
get            16
dtype: int64

In [None]:
relations3

['is',
 'is',
 'claims',
 'Claim',
 'Is Fake',
 'Check',
 'Check',
 'Check',
 'give',
 'Modi',
 'Check',
 'Check',
 'Check',
 'Check',
 'Revived Amid',
 'Check',
 'linked to',
 'linked to',
 'linked to',
 'Check',
 'Is',
 'Video From',
 'Check',
 'Check',
 'Check',
 'Check',
 'Check',
 'Claim',
 'denied',
 'Check',
 'Check',
 'Share',
 'Is from',
 'Check',
 'Passed',
 'Check of',
 'claim',
 'Waved at',
 'Waved at',
 'Sarma',
 'Sarma',
 'Check',
 'Chanting',
 'Are False',
 'Are False',
 'was',
 'Chanted In',
 'Check',
 'Check',
 'Revived With',
 'Images',
 'Check',
 "'s",
 'shared',
 'Is Fake',
 'Were Indian',
 'Did',
 'wants',
 'Check',
 'Photos Of',
 'Photos Of',
 'Check',
 'Shared As',
 'Shared As',
 'slip over',
 'Standing On',
 'Shared With False',
 'Check',
 'go',
 'Check',
 'Video Of',
 'Ask',
 'Check',
 'Check',
 'Runs',
 'Shared As',
 'Quite',
 'share old',
 'Check',
 'Photo of',
 'Shows Pakistani',
 'Peddled',
 'Video of',
 'Does',
 'Die In',
 'As',
 'Video From',
 'shared as'

In [None]:
import  csv

with open("PakistaniDatasetRelations.csv","w") as f:
    wr = csv.writer(f,delimiter="\n")
    wr.writerow(relations3)