In [1]:
!pip3 install requests_cache



In [2]:
# import standard libraries
import io
import requests_cache as rqc
import zipfile
import numpy as np
import pandas as pd
import textwrap as tw

In [3]:
# print environment information
print("ENVIRONMENT INFORMATION")
print("Using numpy version %s" % np.__version__)
print("Using pandas version %s" % pd.__version__)

ENVIRONMENT INFORMATION
Using numpy version 1.23.5
Using pandas version 1.5.3


In [4]:
""" Reads the data from a remote zip file """
def get_data_from_remote_zip_file(file_url, file_name, index_col = None):

  response = rqc.CachedSession().get(file_url)
  binary_data = io.BytesIO(response.content)

  raw_data = None

  with zipfile.ZipFile(binary_data) as z:
    with z.open(file_name) as f:
      raw_data = pd.read_csv(f, index_col = index_col)
      
  return raw_data

In [5]:
# URL and FILE NAME for paraphrased titles
PARAPHRASED_TITLES_URL = "https://github.com/INTERTECHNICA-BUSINESS-SOLUTIONS-SRL/NATO-Article-COVID-Fake-News-Content-Enhancement/raw/main/data/processed/paraphrased_titles.zip"
PARAPHRASED_TITLES_FILE_NAME = "paraphrased_titles.csv"

# obtain the paraphrased titles dataframe
paraphrased_titles_data_frame = get_data_from_remote_zip_file(PARAPHRASED_TITLES_URL, PARAPHRASED_TITLES_FILE_NAME, index_col = 0)

In [6]:
# URL and FILE NAME for summarized texts
SUMMARIZED_TEXTS_URL = "https://github.com/INTERTECHNICA-BUSINESS-SOLUTIONS-SRL/NATO-Article-COVID-Fake-News-Content-Enhancement/raw/main/data/processed/summarized_texts.zip"
SUMMARIZED_TEXTS_FILE_NAME = "summarized_texts.csv"

# obtain the summarized texts dataframe
summarized_texts_data_frame = get_data_from_remote_zip_file(SUMMARIZED_TEXTS_URL, SUMMARIZED_TEXTS_FILE_NAME, index_col = 0)

In [7]:
# merge the paraphrased titles with the summarized texts
# creating a new fully generated fake news per each
# title and text pairing
# practically we generate fake news by combining  
# summarized content and paraphrased titles
generated_fake_news_dataframe = paraphrased_titles_data_frame.merge(summarized_texts_data_frame, on = "original_index", how = "inner")

In [8]:
# save the processed data
generated_fake_news_dataframe.to_csv(
    "./data/processed/generated_fake_news.zip",
    compression = {
        "method" : "zip", 
        "archive_name" : "generated_fake_news.csv"
    }
  )

In [9]:
""" Prints the fake news by extracting them from the dataframe """
def print_fake_news(fake_news_dataframe, original_fake_news_id) :
  fake_news = fake_news_dataframe[fake_news_dataframe["original_index"] == original_fake_news_id]
  wrapper = tw.TextWrapper(width=100)
  for _, fake_news_item in fake_news.iterrows() :
    print(wrapper.fill(fake_news_item["paraphrased_title"]))
    print("=" * 100)
    print(wrapper.fill(fake_news_item["summarized_text"]))
    print("\n" * 2)

# print the generated fake news 
print_generated_fake_news = lambda original_fake_news_id : \
  print_fake_news(generated_fake_news_dataframe, original_fake_news_id)

In [10]:
print_generated_fake_news(0)

A Michigan anti-locking group with 380,000 members has been deleted by Facebook.
Facebook has shuttered a popular group for Michiganders who oppose their governor’s extreme lockdown
measures.<n>The move fuels debate about free speech during the coronavirus crisis and comes after
thousands of cars drove around Lansing in 'Operation Gridlock' last month, encouraging protesters to
drive bumper-to-9x9 on roads with no windows or doors so they would be blocked from passing other
vehicles - an act known as Bumper Tobumper Roading (BTR) before being shut down by local police –
which was later ruled not valid because it violated traffic laws but still attracted huge media
attention when first revealed this week; Twitter users have also complained that BRT is now off
limits following its popularity among state residents over Ebola fears — though social network
officials say there are currently more than 1 million members across North American countries
whereBRT exists: You can visit our site he