<h3><u>Traffic Data Collector</u></h3>
This script collects traffic speed data by making an API request every 2 min. 


<b><i>DISCLAIMER:</i></b>
<ul>
    <li>Before running the script, make sure to enter your own API key in the __main__ section. Link to HERE Technologies Developer platform [Here]("https://developer.here.com/")
    <li>The path to the lat-long of the route file need to be hard coded in the __main__ section. The file can be downlaoded from [Here]("https://drive.google.com/file/d/1wdGMgG3R_Bl_ZhH6_N6l4uH6v_KS20Sv/view?usp=sharing")
</ul>

In [None]:
import json
import time
from datetime import datetime
import re
import pytz
import requests
import pandas as pd
import numpy as np
from operator import itemgetter

In [None]:
def get_speed(url) :
    """
    Get traffic speeds at the moment 
    """

    #getting the page and converting obtained JSON data into a dictionary
    data = json.loads(requests.get(url).content.decode("utf-8"))

    # calculating speed
    speeds = [np.round(i["summary"]["length"] * 3.6/i["summary"]["duration"], 3) for i in data["routes"][0]["sections"]]
 
    return speeds

In [None]:
def record_speeds(url, location_names, not_nil_indices, end_after, interval, save_after) :
    # points : list of points in lat-longs
    # end_after : in minutes
    # interval : in seconds
    # save_after : # records after which the files need to be saved
    
    dateTime = datetime.now(pytz.timezone("Asia/Kolkata"))

    # start time and date for file name
    Date, start_time = dateTime.strftime("%d%b%Y"), dateTime.strftime("%-H_%-M_%-S")
    # filename for the csv file
    file_name = "Data_" + Date + "@" + start_time + ".csv"    
    
    # ending time
    end_time = end_after*60 + time.time()
 
    counter = 1    # a counter for data-points 
 
    # dataframe for storing data
    # adding the Date and Time column as well
    df = pd.DataFrame({i: [] for i in ["Date", "Time"] + location_names})
    # creating file
    df.to_csv(file_name, index=False)
    
    print("\nStarting the scraping process. Sit back and relax\n")

    while True :
        # on time up, break the loop
        if time.time() >= end_time :
            break
        
        # storing only the speeds corresponding to the Non-NIL locations
        speeds = list(itemgetter(*not_nil_indices)(get_speed(url)))
    
        _, Time = str(datetime.now(pytz.timezone("Asia/Kolkata"))).split(" ")   # getting current date and time 
        Time = re.sub("\..*", "", Time)    # removing milliseconds from the time.
 
        # appending the speeds to the dataframe
        df.loc[len(df.index)] = [Date, Time] + speeds 
        

        msg = "Scraped at {0}\tData-points collected: {1}\tGoing to sleep for {2}sec".format(Time, counter, interval)
        # if "save_after" data points are collected, create the files. Don't wait for the script to end
        if counter % save_after == 0 :
            msg += "\tFiles created.."
            print("\r" + msg, end="")

            # creating csv file
            df.to_csv(file_name, mode="a", header=False, index = False)
            # removing all the rows from the dataframe
            df.drop(df.index, axis=0, inplace=True)

        else :
            print("\r" + msg + " "*len(msg + "\tFiles created.."), end="")    # empty the row first
 
        counter += 1
    
        # going to sleep
        time.sleep(interval)
 
    # saving one last time
    df.to_csv(file_name, mode="a", header=False, index = False)
    
    print("\n\nScraping complete.. File saved as " + file_name)
    print("\nData-points collected :", counter-1)

In [None]:
if __name__ == "__main__" :

    end_after = 2.5*60    # in minutes  (for 5.5 hours)
    interval = 2*60  #in seconds  (for 2 min)
    save_after = 50   # save data after every 50th data point collected
    
    # fetching the dataset
    route = pd.read_csv("PATH TO THE LAT-LONG OF THE ROUTE FILE").astype("str")

    # finding out the route points and names
    points = (route.Lat + "," + route.Long).tolist()
    temp_locations = route.Comment.tolist()[:-1]
    
    # finding where where we get the NIL 
    not_nil_indices = [i for i, val in enumerate(temp_locations) if val != "NIL"]
    
    # making a list with no NIL locations
    location_names = list(itemgetter(*not_nil_indices)(temp_locations))

    apiKey = "INSERT YOUR API KEY HERE"  # api key
    via = "&via=".join(points[1:-1])    # setting up waypoints in between
    # generating url
    url = "https://router.hereapi.com/v8/routes?origin=" + points[0] + "&transportMode=car&destination=" + points[-1] + "&via=" + via + "&return=summary&apiKey=" + apiKey
    
    record_speeds(url, location_names, not_nil_indices, end_after, interval, save_after)