In [None]:
#Data Extraction using Beautiful Soup
def data_extraction():
    #Libraries For Data Extraction
    from urllib.request import urlopen
    from bs4 import BeautifulSoup
    import time
    import datetime
    
    print("Extracting data")
    
    #specify the url
    quote_page = 'http://mmdatraffic.interaksyon.com/line-view-edsa.php'

    # query the website and return the html to the variable ‘page’
    page = urlopen(quote_page)

    # parse the html 
    soup = BeautifulSoup(page, 'html.parser')

    #set time stamp.
    ts = time.time()
    timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    stringdata = "TIMESTAMP: "+ str(timestamp)+"\n"+"\n"

    #extract name of lines
    list_of_names_html=soup.find_all('div',{'class':'line-name'})
    list_of_names = []

    for children in list_of_names_html:
        grandchildren = children.findChild("p")
        temp = grandchildren.get_text(separator=' ')
        temp = temp.split(' ')
        list_of_names.append(temp[0]) #NOTE: fix string separation
    list_of_names.pop(0) 

    #extract southbound/northbound volume
    list_of_volume_html=soup.find_all('div',{'class':'line-status'})
    list_of_southbound = []
    list_of_northbound = []

    i=1
    for children in list_of_volume_html:
        temp = children.text
        temp = temp.split()

        if(temp[1]=="LIGHT"):
            temp[1]=0
        elif(temp[1]=="MODERATE"):
            temp[1]=1
        elif(temp[1]=="HEAVY"):
            temp[1]=2

        if(i%2 == 0):
            list_of_northbound.append(temp[1])
        else:
            list_of_southbound.append(temp[1])

        i=i+1

    #save data into one variable
    for i in range(len(list_of_names)):
        stringdata = stringdata + '{:>12}  {:>12}  {:>12}'.format(list_of_names[i], str(list_of_southbound[i]), str(list_of_northbound[i])) + "\n"
    
    #Save raw data into csv file
    filename = "rawdata_"+ str(timestamp)+".csv"
    file = open(filename,"a")
    file.write(stringdata)   
    file.close()
    
    #return list containing data lists
    list_of_lists = [list_of_names, list_of_southbound, list_of_northbound, timestamp]
    return list_of_lists

In [None]:
# Visualization using Matplotlib
def visualize_overall(list_of_names, list_of_southbound, list_of_northbound,timestamp):
    #Libraries For Visualization and Analysis
    %matplotlib notebook
    import pandas as pd
    import numpy as np
    import seaborn as sns
    from matplotlib import pyplot as plt
    import matplotlib.animation as animation
    
    print("Visualizing data")
    
    #Visualize overall EDSA traffic at certain timestamp
    roadnum = range(0,len(list_of_names))
    plt.scatter(roadnum, list_of_southbound, label="SOUTHBOUND", color="g")
    plt.scatter(roadnum, list_of_northbound, label = "NORTHBOUND", color = "m")

    plt.xlabel("Road")
    plt.ylabel("Volume")

    plt.title(label="EDSA TRAFFIC: "+timestamp)
    plt.legend(bbox_to_anchor=(0, 1), loc='lower center', ncol=1)
    plt.savefig("EDSA TRAFFIC: "+timestamp+".png") #finally, save the figure  

In [None]:
def main(): #contains all necessary sub functions
    #Libraries For Scheduling
    import schedule
    import time
    import datetime
        
    def job():
        print("Started new run.")
        list_of_lists = data_extraction()
        visualize_overall(list_of_lists[0],list_of_lists[1],list_of_lists[2],list_of_lists[3])
        
    schedule.every(0.1).minutes.do(job)
    
    #new strategy: define a time interval in which you would like to harvest data. 
    #after this period of time is over, go on and create the visualization of the data. in short, live
    #processing might be infeasible in this context
    
    while 1:
        schedule.run_pending()
        time.sleep(1) 
               
main() #run main