In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install -q kaggle
!mkdir -p /root/.kaggle 

# API Key of Kaggle Location in Drive:
# /content/gdrive/MyDrive/Colab\ Notebooks/kaggle.json
!cp /content/gdrive/MyDrive/Colab\ Notebooks/kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d asad1m9a9h6mood/news-articles
!unzip -o /content/news-articles.zip
!rm /content/news-articles.zip
!cp /content/Articles.csv /content/gdrive/MyDrive/Colab\ Notebooks/ML\ Assignment/

Downloading news-articles.zip to /content
  0% 0.00/1.73M [00:00<?, ?B/s]
100% 1.73M/1.73M [00:00<00:00, 166MB/s]
Archive:  /content/news-articles.zip
  inflating: Articles.csv            


In [3]:
from datetime import datetime, timedelta
from random import randint, choice
import pandas as pd

In [4]:
def generate_random_date(std_time, time_delta_range_start, time_delta_range_end):
    """ Generates a random date in past or future within a given time range"""
    past_future = [-1, 1]
    delta = timedelta(days=randint(time_delta_range_start, time_delta_range_end))
    random_date_obj = std_time + delta * choice(past_future)
    return random_date_obj

def random_date_format(date_obj, date_time_format_list):
    """ Randomly choices a datetime format & format the date_obj accordingly """
    random_format = choice(date_time_format_list)
    random_date = date_obj.strftime(random_format)
    return random_date

In [5]:
now_time = datetime.now()
articles = pd.read_csv("/content/Articles.csv", encoding="ISO-8859-1")
date_time_format_list = [
                    "%Y-%m-%d %H:%M:%S", 
                    "%A, %B %d, %Y %I:%M %p",
                    "%Y/%m/%d",
                    "%d/%m/%Y",
                    "%A %d. %B %Y",
                    "%d/%m/%y",
                    "%Y %b-%d %H:%M",
                    "%m/%d/%Y",
                    "%b-%d-%Y",
                    "%d %B, %Y"
                    ]
articles

Unnamed: 0,Article,Date,Heading,NewsType
0,KARACHI: The Sindh government has decided to b...,1/1/2015,sindh govt decides to cut public transport far...,business
1,HONG KONG: Asian markets started 2015 on an up...,1/2/2015,asia stocks up in new year trad,business
2,HONG KONG: Hong Kong shares opened 0.66 perce...,1/5/2015,hong kong stocks open 0.66 percent lower,business
3,HONG KONG: Asian markets tumbled Tuesday follo...,1/6/2015,asian stocks sink euro near nine year,business
4,NEW YORK: US oil prices Monday slipped below $...,1/6/2015,us oil prices slip below 50 a barr,business
...,...,...,...,...
2687,strong>DUBAI: Dubai International Airport and ...,3/25/2017,Laptop ban hits Dubai for 11m weekend traveller,business
2688,"strong>BEIJING: Former Prime Minister, Shaukat...",3/26/2017,Pak China relations not against any third coun...,business
2689,strong>WASHINGTON: Uber has grounded its fleet...,3/26/2017,Uber grounds self driving cars after accid,business
2690,strong>BEIJING: The New Development Bank plans...,3/27/2017,New Development Bank plans joint investments i...,business


In [6]:
#Adding Extra Columns
new_columns = {"Is_Deadline": 0, "Start": 0, "End": 0}
articles = articles.assign(**new_columns)
articles

Unnamed: 0,Article,Date,Heading,NewsType,Is_Deadline,Start,End
0,KARACHI: The Sindh government has decided to b...,1/1/2015,sindh govt decides to cut public transport far...,business,0,0,0
1,HONG KONG: Asian markets started 2015 on an up...,1/2/2015,asia stocks up in new year trad,business,0,0,0
2,HONG KONG: Hong Kong shares opened 0.66 perce...,1/5/2015,hong kong stocks open 0.66 percent lower,business,0,0,0
3,HONG KONG: Asian markets tumbled Tuesday follo...,1/6/2015,asian stocks sink euro near nine year,business,0,0,0
4,NEW YORK: US oil prices Monday slipped below $...,1/6/2015,us oil prices slip below 50 a barr,business,0,0,0
...,...,...,...,...,...,...,...
2687,strong>DUBAI: Dubai International Airport and ...,3/25/2017,Laptop ban hits Dubai for 11m weekend traveller,business,0,0,0
2688,"strong>BEIJING: Former Prime Minister, Shaukat...",3/26/2017,Pak China relations not against any third coun...,business,0,0,0
2689,strong>WASHINGTON: Uber has grounded its fleet...,3/26/2017,Uber grounds self driving cars after accid,business,0,0,0
2690,strong>BEIJING: The New Development Bank plans...,3/27/2017,New Development Bank plans joint investments i...,business,0,0,0


In [7]:
#Fills up the empty columns
for index, row in articles.iterrows():
    #Randomly chooses 50% of the data to be positive & 50% of the data negative
    if choice([0,1]):
        article, _ , _ , _, is_deadline, start, end = row
        #Randomly chooses a date and a date-time formate
        rand_dt_obj = generate_random_date(now_time, 1, 100)
        rand_dt = random_date_format(rand_dt_obj, date_time_format_list)
        article_words = article.split()
        #Randomly chooses a position to insert the date string.
        rand_pos = randint(0,len(article_words))
        article_words.insert(rand_pos, rand_dt)
        
        article = ' '.join(article_words)
        is_deadline = 1
        start = article.index(rand_dt)
        end = start+len(rand_dt)
        
        articles.loc[index, "Article"] = article
        articles.loc[index, "Is_Deadline"] = is_deadline
        articles.loc[index, "Start"] = start
        articles.loc[index, "End"] = end
articles

Unnamed: 0,Article,Date,Heading,NewsType,Is_Deadline,Start,End
0,KARACHI: The Sindh government has decided to b...,1/1/2015,sindh govt decides to cut public transport far...,business,0,0,0
1,HONG KONG: Asian markets started 2015 on an up...,1/2/2015,asia stocks up in new year trad,business,0,0,0
2,HONG KONG: Hong Kong shares opened 0.66 percen...,1/5/2015,hong kong stocks open 0.66 percent lower,business,1,35,52
3,HONG KONG: Asian markets tumbled Tuesday follo...,1/6/2015,asian stocks sink euro near nine year,business,1,362,375
4,NEW YORK: US oil prices Monday slipped below $...,1/6/2015,us oil prices slip below 50 a barr,business,1,265,276
...,...,...,...,...,...,...,...
2687,strong>DUBAI: Dubai International Airport and ...,3/25/2017,Laptop ban hits Dubai for 11m weekend traveller,business,0,0,0
2688,"strong>BEIJING: Former Prime Minister, Shaukat...",3/26/2017,Pak China relations not against any third coun...,business,1,105,113
2689,strong>WASHINGTON: Uber has grounded its fleet...,3/26/2017,Uber grounds self driving cars after accid,business,0,0,0
2690,strong>BEIJING: The New Development Bank plans...,3/27/2017,New Development Bank plans joint investments i...,business,0,0,0


In [8]:
# Check where deadline exists
articles[:][articles['Is_Deadline']==1]

Unnamed: 0,Article,Date,Heading,NewsType,Is_Deadline,Start,End
2,HONG KONG: Hong Kong shares opened 0.66 percen...,1/5/2015,hong kong stocks open 0.66 percent lower,business,1,35,52
3,HONG KONG: Asian markets tumbled Tuesday follo...,1/6/2015,asian stocks sink euro near nine year,business,1,362,375
4,NEW YORK: US oil prices Monday slipped below $...,1/6/2015,us oil prices slip below 50 a barr,business,1,265,276
6,KARACHI: Strong bulls 2022 Apr-07 10:55 on Fri...,1/9/2015,bullish kse jumps over 33000 psychological bar...,business,1,3,20
8,KARACHI: Wholesale market rates for sugar drop...,1/13/2015,sugar prices drop to rs 49.80 in sind,business,1,32,42
...,...,...,...,...,...,...,...
2681,strong>BEIJING: The Xinjiang Uygur autonomous ...,3/17/2017,CPEC China approves huge infrastructure projec...,business,1,218,226
2684,"strong>LONDON: Arcelik, the home appliances ar...",3/21/2017,For Turkish giant Arcelik Pakistan is among fo...,business,1,30,66
2685,strong>RIYADH/DUBAI: Saudi Arabia plans to tig...,3/21/2017,Saudis to tighten curbs on foreign workers in ...,business,1,150,160
2686,strong>BEIJING: A Chinese rail company has won...,3/25/2017,Chinese train manufacturer wins railcar bid in US,business,1,40,57


In [11]:
articles.to_csv("/content/gdrive/MyDrive/Colab Notebooks/ML Assignment/Processed_Articles.csv")