Choropleth Visualisation Notebook

In [2]:
import plotly.express as px
import plotly as py
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import pandas as pd


#engine='python' because utf-8 encoding wasn't being recognised. 
df = pd.read_csv("../DataSets/COVID-19_Dataset_Fake.csv", engine="python")

In [3]:
df.columns

Index(['Srno', 'Language', 'Desc', 'web-scraper-order',
       'web-scraper-start-url', 'titlelink', 'titlelink-href', 'desc',
       'article-link', 'article-link-href', 'DomainName', 'website-name',
       'date', 'country', 'is_fake', 'main_title'],
      dtype='object')

In [4]:
df.head(5)

Unnamed: 0,Srno,Language,Desc,web-scraper-order,web-scraper-start-url,titlelink,titlelink-href,desc,article-link,article-link-href,DomainName,website-name,date,country,is_fake,main_title
0,0,pt,Circulating on social networks a video that sh...,1588079462-6624,https://www.poynter.org/ifcn-covid-19-misinfor...,Read More,https://www.poynter.org/?ifcn_misinformation=a...,Explanation: There is no evidence that the vid...,Read the Full Article (Observador),https://observador.pt/factchecks/fact-check-um...,observador.pt,Fact-checked by: Observador,08-04-2020,Portugal,False,A video shows a fortune teller predicting the...
1,2,en,Internet sensation and the world’s cutest baby...,1588082282-6968,https://www.poynter.org/ifcn-covid-19-misinfor...,Read More,https://www.poynter.org/?ifcn_misinformation=i...,Explanation: Anahita's mother has denied the r...,Read the Full Article (Newschecker),https://www.newschecker.in/article/news-detail...,newschecker.in,Fact-checked by: Newschecker,17-04-2020,India,False,Internet sensation and the worldâ€™s cutest ba...
2,6,en,A video has been viewed hundreds of thousands ...,1588079696-6668,https://www.poynter.org/ifcn-covid-19-misinfor...,Read More,https://www.poynter.org/?ifcn_misinformation=a...,Explanation: The video actually shows Trump at...,Read the Full Article (AFP),https://factcheck.afp.com/video-shows-us-presi...,factcheck.afp.com,Fact-checked by: AFP,09-04-2020,Indonesia,False,A video has been viewed hundreds of thousands ...
3,7,en,A Facebook post claiming that the National Tre...,1588080417-6743,https://www.poynter.org/ifcn-covid-19-misinfor...,Read More,https://www.poynter.org/?ifcn_misinformation=t...,Explanation: The post urges residents to confi...,Read the Full Article (PesaCheck),https://pesacheck.org/false-treasury-is-not-se...,pesacheck.org,Fact-checked by: PesaCheck,11-04-2020,Kenya,False,"Treasury is depositing Kshs 45, 000 to the mob..."
4,8,es,Moves on Facebook and Twitter a video showing ...,1588080261-6735,https://www.poynter.org/ifcn-covid-19-misinfor...,Read More,https://www.poynter.org/?ifcn_misinformation=h...,Explanation: The images with police violence a...,Read the Full Article (Animal PolĂ­tico),https://www.animalpolitico.com/elsabueso/hungr...,animalpolitico.com,Fact-checked by: Animal PolĂ­tico,11-04-2020,Mexico,False,Hunagrian authorities are capturing men 50 or ...


In [5]:
df['is_fake'].unique()

array(['FALSE', 'Pants on Fire!', 'misleading', 'Explanatory',
       'Partly false', 'Mostly False', 'PARTLY FALSE', 'MISLEADING',
       'Misleading', 'No Evidence', 'Mainly false', 'Mostly false',
       'No evidence', 'Partially false', 'Misleading/False',
       'MOSTLY TRUE', 'Partly true', 'false and misleading', 'HALF TRUE',
       'Mostly True', "(Org. doesn't apply rating)", 'Fake', 'Correct',
       'Unlikely', 'Conspiracy theory', 'Partially true', 'Not true',
       'Half True', 'MOSTLY FALSE', 'PARTLY TRUE'], dtype=object)

In [6]:
#to check if any NaN values are present
df.count()

Srno                     1685
Language                 1685
Desc                     1685
web-scraper-order        1685
web-scraper-start-url    1685
titlelink                1685
titlelink-href           1685
desc                     1685
article-link             1685
article-link-href        1685
DomainName               1685
website-name             1685
date                     1685
country                  1685
is_fake                  1685
main_title               1685
dtype: int64

In [7]:
#Fake news is spread across a scale
#However for the purpose of this project; our fake news classifier is a two category classifier. Hence we will simply change the different scales to one specific value:- 'FALSE'
df['is_fake'] = df['is_fake'].replace({'Pants on Fire!' : 'FALSE', 
                                        'misleading': 'FALSE',
                                        'Partly false' : 'FALSE',
                                        'Mostly False' : 'FALSE',
                                        'PARTLY FALSE' : 'FALSE',
                                        'MISLEADING' : 'FALSE',
                                        'Misleading' : 'FALSE',
                                        'Mainly false' : 'FALSE',
                                        'Mostly false' : 'FALSE',
                                        'Partially false' : 'FALSE',
                                        'Misleadign/False' : 'FALSE',
                                        'false and misleading' : 'FALSE',
                                        'Fake' : 'FALSE',
                                        'Unlikely' : 'FALSE',
                                        'Not true' : 'FALSE',
                                        'MOSTLY FALSE' : 'FALSE' })


In [8]:
##unique values after replacing most of the "degrees of fakeness"

df['is_fake'].unique()

array(['FALSE', 'Explanatory', 'No Evidence', 'No evidence',
       'Misleading/False', 'MOSTLY TRUE', 'Partly true', 'HALF TRUE',
       'Mostly True', "(Org. doesn't apply rating)", 'Correct',
       'Conspiracy theory', 'Partially true', 'Half True', 'PARTLY TRUE'],
      dtype=object)

In [9]:
#grouping "country" feature by "is_fake" feature on all records where is_fake = FALSE 

df_fake_country = pd.DataFrame({'Fake':df.loc[df['is_fake'] == 'FALSE'].groupby(["is_fake", "country"]).size()}).reset_index()

In [10]:
#checking if "country" feature has only one value per record

df_fake_country['country'].unique()

array([' Africa', ' Algeria', ' Australia', ' Brazil', ' Canada',
       ' China', ' Colombia', ' Croatia', ' DR Congo', ' Ethiopia',
       ' France', ' Gabon', ' Germany', ' Ghana', ' Hong Kong', ' India',
       ' Indonesia', ' Israel', ' Italy', ' Ivory Coast', ' Japan',
       ' Kenya', ' Lithuania', ' Malaysia', ' Mexico', ' Morocco',
       ' Nigeria', ' North Macedonia', ' Pakistan', ' Philippines',
       ' Portugal', ' Senegal', ' Singapore', ' South Africa', ' Spain',
       ' Sri Lanka', ' Tanzania', ' Thailand', ' Tunisia', ' Turkey',
       ' Uganda', ' United States', ' Venezuela', ' West Africa'],
      dtype=object)

In [13]:
#number of countries 
df_fake_country.count()

is_fake    44
country    44
Fake       44
dtype: int64

In [24]:
df_fake_country[['country','Fake']]

Unnamed: 0,country,Fake
0,Africa,2
1,Algeria,1
2,Australia,26
3,Brazil,138
4,Canada,35
5,China,7
6,Colombia,25
7,Croatia,31
8,DR Congo,1
9,Ethiopia,2


In [15]:
#Plot of fake news 

fig = go.Figure(data=go.Choropleth(
    locations=df_fake_country['country'],
    locationmode='country names',
    z = df_fake_country['Fake'],
    colorscale= 'Reds',
    marker_line_color = 'black',
    marker_line_width = 0.5
))

fig.update_layout(
    title_text = 'Number of Fake News emerging from different countries',
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
        projection_type = 'natural earth'
    )
)