# Google Maps Traffic Data

In [25]:
import requests
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd

import time
import random
import math
import datetime

## Generate Random Data Points from Specific Location

### Measure the Distance Between Two Points

In [26]:
point_A = "-29.60017332074011, 30.379163164887483".replace(" ", "").split(",")
point_B = "-29.488765110829252, 30.215741548444527".replace(" ", "").split(",")

point_A = list(map(float, point_A))
point_B = list(map(float, point_B))

latitude = 0
longitude = 1

approximate_distance_per_degree = 110.25
#use Euclidean distance
distance_between = math.sqrt((point_A[latitude]- point_B[latitude])**2 + (point_A[longitude] - point_B[longitude])**2) * approximate_distance_per_degree
distance_between

21.80565904852594

### Determine the Approximate Range of Random Coordinates

In [27]:
distance = 50.0
point = point_A

#assume latitude is the same i.e. on a straight line
range_value = (distance / approximate_distance_per_degree) + point[longitude] - point[longitude]
range_value

0.45351473922902485

### Create Random Coordintates

johannesburg = [-26.207201909503596, 28.04650823914504]
pietermartizburg = [-29.6004500675832, 30.378738192502244]
points = [johannesburg, pietermartizburg]

def create_random_coordinates(coordinates):
    random_coordinates = coordinates.copy()
    for coordinate in range(len(coordinates)):
        random_coordinates[coordinate] = random.uniform(random_coordinates[coordinate] - range_value, random_coordinates[coordinate] + range_value)

    return random_coordinates

def create_list_of_random_coordinates(coordinate_point):
    random_coordinates = list(range(50))
    for coordinate in random_coordinates:
        random_coordinates[coordinate] = create_random_coordinates(coordinate_point)
    
    return random_coordinates

def save_coordinates_to_file(coordinates):
    coordinate_string = []
    point_name = str(point[0]) + "," + str(point[1])
    for coordinate in random_coordinates:
        coordinates = str(coordinate[0]) + "," + str(coordinate[1])
        coordinate_string.append(coordinates)

    coordinate_string = "\n".join(coordinate_string)
    coordinate_string = f"from:{point_name}\n" + coordinate_string
    
    text_file = open(f"random_coordinates_from{point_name}.txt", "x")
    text_file.write(coordinate_string)
    text_file.close()

for point in points:
    random_coordinates = create_list_of_random_coordinates(point)
    save_coordinates_to_file(random_coordinates)



## Scrape Data

### Create Database Template

database_columns = pd.DataFrame([["Date", "Hour", "From (Coordinates)", "To (Coordinates)", "Distance", "Time (Usual)", "Time (Now)", "Time (Range)"]])
database_columns.to_csv("google_maps_traffic_data.csv", index=False, header=False)

database = pd.read_csv("google_maps_traffic_data.csv")
data_columns = database.columns

### Scrape Data

In [33]:
print("Scraper Started")
folder_files = [
    r"C:\Users\vente\OneDrive\Documents\Code\web_scraper_google_maps\random_coordinates_from-26.207201909503596,28.04650823914504.txt", 
    r"C:\Users\vente\OneDrive\Documents\Code\web_scraper_google_maps\random_coordinates_from-29.6004500675832,30.378738192502244.txt"
    ]
database = pd.read_csv(r"C:\Users\vente\OneDrive\Documents\Code\web_scraper_google_maps\google_maps_traffic_data.csv")
data_columns = database.columns

for text_file in folder_files:
    file = open(text_file)
    file = file.read()
    file = file.split("\n")

    from_location = [file[0].replace("from:", "")]
    to_location = file[1:]

    root_URL = "https://www.google.com/maps/dir/"

    for number in range(2):
        for from_loc in from_location:
            for to_loc in to_location:
                URL = root_URL + from_loc + "/" + to_loc
                page = requests.get(URL) 
                soup = BeautifulSoup(page.content, "html.parser") 
                date_time = datetime.datetime.now()
                date = date_time.strftime("%Y-%m-%d")
                hour = date_time.strftime("%H")

                soup = str(soup)
                distance = soup.find(' km') #the whole script relies on finding this
                section = np.array(soup[distance - 200: distance+400].split('\\"'))

                unwanted_characters = list("[]/\\_:")
                for character in unwanted_characters: section = section[np.char.find(section, character) == -1]
                unwanted_list_items = list(", ")
                for item in unwanted_characters: section = section[section != ","]

                try:
                    distance  = section[np.char.find(section, "km") != -1][0]
                    time_no_traffic = section[np.where(section == distance)[0] + 1][0]
                    time_traffic = section[np.where(section == distance)[0] + 2][0]
                    time_range = section[np.char.find(section, " - ") != -1][0]
                    row = [date, hour, from_loc, to_loc, distance, time_no_traffic, time_traffic, time_range]
                except:
                    row = [date, hour, from_loc, to_loc, None, None, None, None]
                
                row_data = pd.DataFrame([row], columns=data_columns)
                database = pd.concat([database, row_data])

                print("Completed: ", row)
                wait_time_seconds = random.randrange(5, 10)
                time.sleep(wait_time_seconds)
        from_location, to_location = to_location, from_location
    print("Completed: Text File")

database.to_csv(r"C:\Users\vente\OneDrive\Documents\Code\web_scraper_google_maps\google_maps_traffic_data.csv", index=False)
print("Scaper Finished")


Scraper Started
Completed:  ['2022-07-23', '16', '-26.207201909503596,28.04650823914504', '-26.0446397405829,27.80426232604286', None, None, None, None]
Completed:  ['2022-07-23', '16', '-26.207201909503596,28.04650823914504', '-26.49133840153267,27.795880438852187', None, None, None, None]
Completed:  ['2022-07-23', '16', '-26.207201909503596,28.04650823914504', '-26.012346963578064,27.853247845273337', None, None, None, None]


KeyboardInterrupt: 