In [155]:
import re
import json
import pickle
import requests
import pandas as pd
from ast import literal_eval
from bs4 import BeautifulSoup
from datetime import datetime, timedelta


header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

"""
Two steps: 
1. get gas prices from list in html with soup
2. get station info from requests
3. Combine and $$$
"""
now = datetime.now()
locations = ['Oakville', 'Burlington','Hamilton',]# 'Ancaster', 'Milton', 'Mississauga']

try:
    with open('Data/master_data.json', 'r') as f:
        data_dfs = json.load(f)
    print('Loaded master')
except:
    print('No master found')
    data_dfs = dict()

def grab_prices(temp_dfs):
    for loc in locations:
        url = f'https://www.gasbuddy.com/GasPrices/Ontario/{loc}'
        url_soup = requests.get(url)

        # Prices list
        soup = BeautifulSoup(url_soup.content, "html.parser")
        # Find prices
        prices = soup.text.split('p.a = ')[1].split(';\r')[0].replace('[', '').replace(']', '').split(',"')
        # Clean and remove 0's
        prices = [i.replace('"', '') for i in prices if len(i) > 1]

        # Stations dataframe
        r = requests.get(url, headers=header)
        # Set dataframe
        df = pd.read_html(r.text)[0]
        # Clean dataframe
        df = df[[1,2,3]].dropna().rename(columns={1:'Station', 2: 'City', 3:'Date'})
        
        # Fine cities
        city = df.City.iloc[0]
        
        # Set values
        df['Prices'] = prices
        df['Address'] = df.Station.apply(lambda x: x.split(')  ')[1]) 
        df['Address'] = df.Address.apply(lambda x: x.split(city)[0])
        df['Station'] = df.Station.apply(lambda x: x.split(' (')[0])
        df['Date'] = df.Date.apply(lambda x: x.split(' ago')[0])
        
        # Create datetime
        for i, date in enumerate(df.Date):
            if date[-1] == 'h':
                delta = int(date[:-1])
                date = datetime.now() - timedelta(hours=delta)
            elif date[-1] == 'm':
                delta = int(date[:-1])
                date = datetime.now() - timedelta(minutes=delta)
            # Reset older dates to now
            if date == '1d':
                date = now
                
            df.Date.iloc[i] = date
            
        df.reset_index(drop=True, inplace=True)
        
        if loc in temp_dfs.keys():
            print('Appending to list')
            temp_dfs[loc].append(df.to_dict(orient='list'))
        else:            
            temp_dfs[loc]= [df.to_dict(orient='list')]
    
    print('Saving to master')
    with open('Data/master_data.json', 'w') as f:
        json.dump(temp_dfs, f, indent=4, sort_keys=True, default=str)
            
    return temp_dfs, df, comparison

prices_data, test_df, comp = grab_prices(data_dfs)
prices_data

Loaded master
Comparison failed
Appending to list
Comparison failed
Appending to list
Comparison failed
Appending to list
Saving to master


{'Ancaster': [{'Address': ['100 Legend Ct near Martindale Cr  ',
    '1180 Wilson St W & Garner Rd W  ',
    '10 Legend Ct & Golf Links Rd  ',
    '1136 Golf Links Rd & Stone Church Rd  ',
    '523 Garner Rd E near Southcote Rd  ',
    '33 Wilson St W near Fiddlers Green Rd  ',
    '16 Wilson St W & Fiddlers Green Rd  '],
   'City': ['Ancaster',
    'Ancaster',
    'Ancaster',
    'Ancaster',
    'Ancaster',
    'Ancaster',
    'Ancaster'],
   'Date': ['2019-08-03 12:37:45.375074',
    '2019-08-03 09:46:45.376073',
    '2019-08-03 12:45:45.376073',
    '2019-08-03 12:36:45.377072',
    '2019-08-03 12:14:45.377072',
    '2019-08-03 09:46:45.377072',
    '2019-08-03 12:46:10.516985'],
   'Prices': ['111.4', '117.6', '119.9', '119.9', '120.9', '122.9', '123.3'],
   'Station': ['Costco ',
    'Pioneer ',
    'Shell ',
    'Esso ',
    'Esso ',
    'Esso ',
    'Petro-Canada ']}],
 'Burlington': [{'Address': ['5319 Lakeshore Rd & Kenwood Ave  ',
    '1447 Lakeshore Rd & Locust St  ',
    '1

In [151]:
for col in prices_data:
    print(col, len(prices_data[col]))

Ancaster 1
Burlington 4
Hamilton 4
Milton 1
Mississauga 1
Oakville 4


In [165]:
temp_dict = dict()
for col in comp:
    comp_list = comp[col][0]
    temp_dict[col] = comp_list

# comparison = pd.DataFrame(temp_dict)
with open('Data/master_data.json', 'r') as f:
    data_dfs = json.load(f)
print([len(data_dfs[i]) for i in data_dfs])
pd.DataFrame(data_dfs['Ancaster'][1])

[2, 2, 2, 2, 2, 2]


Unnamed: 0,Address,City,Date,Prices,Station
0,100 Legend Ct near Martindale Cr,Ancaster,2019-08-03 13:51:42.612112,111.4,Costco
1,1180 Wilson St W & Garner Rd W,Ancaster,2019-08-03 13:38:42.613098,115.6,Pioneer
2,1136 Golf Links Rd & Stone Church Rd,Ancaster,2019-08-03 13:09:42.613098,119.9,Esso
3,10 Legend Ct & Golf Links Rd,Ancaster,2019-08-03 12:56:42.613098,119.9,Shell
4,523 Garner Rd E near Southcote Rd,Ancaster,2019-08-03 12:56:42.613098,120.9,Esso
5,33 Wilson St W near Fiddlers Green Rd,Ancaster,2019-08-03 09:56:42.613098,122.9,Esso


In [106]:
import random 
import datetime
wait_time = random.choice(range(14400, 21600))
print(str(datetime.timedelta(seconds=wait_time)))
print('Sleeping:', str(wait_time/3600)[:4], 'hrs')

5:13:39
Sleeping: 5.22 hrs


In [56]:
with open('Data/master_data.json', 'r') as f:
    data_dfs = json.load(f)
# pd.DataFrame(data_dfs['Oakville'][0])
data_dfs['Oakville'][0]

'{"Station":{"0":"Econo ","1":"Pioneer ","2":"Husky ","3":"Shell ","4":"Esso ","5":"Esso ","6":"Petro-Canada ","7":"Pioneer ","8":"Petro-Canada ","9":"Petro-Canada "},"City":{"0":"Oakville","1":"Oakville","2":"Oakville","3":"Oakville","4":"Oakville","5":"Oakville","6":"Oakville","7":"Oakville","8":"Oakville","9":"Oakville"},"Date":{"0":1564826360228,"1":1564771460228,"2":1564827320229,"3":1564827440229,"4":1564825940229,"5":1564825460229,"6":1564825460229,"7":1564825460229,"8":1564825460230,"9":1564818260230},"Prices":{"0":"119.9","1":"120.4","2":"121.7","3":"121.9","4":"121.9","5":"121.9","6":"121.9","7":"121.9","8":"121.9","9":"121.9"},"Address":{"0":"2383 Dundas St W near Old Bronte Rd  ","1":"2451 Lakeshore Rd W & Bronte Rd  ","2":"630 Fourth Line near Speers Rd  ","3":"1528 Dundas St W & Third Line  ","4":"562 Trafalgar Rd & Cross Ave  ","5":"1499 Upper Middle Rd & Third Line  ","6":"1550 North Service Rd W & Third Line  ","7":"754 Bronte Rd near QEW  ","8":"450 Dundas St E & Post

In [3]:
df1 = df[[1,2,3]].dropna().rename(columns={1:'Station', 2: 'City', 3:'Date'})
city = df1.City.iloc[0]
df1['Prices'] = prices
df1['Address'] = df1.Station.apply(lambda x: x.split(')  ')[1]) 
df1['Address'] = df1.Address.apply(lambda x: x.split(city)[0])
df1['Station'] = df1.Station.apply(lambda x: x.split(' (')[0])
df1['Date'] = df1.Date.apply(lambda x: x.split(' ago')[0])

for i, date in enumerate(df1.Date):
    if date[-1] == 'h':
        delta = int(date[:-1])
        date = datetime.now() - timedelta(hours=delta)
    elif date[-1] == 'm':
        delta = int(date[:-1])
        date = datetime.now() - timedelta(minutes=delta)
    df1.Date.iloc[i] = date
df1.reset_index(drop=True)

Unnamed: 0,Station,City,Date,Prices,Address
0,Pioneer,Oakville,2019-08-02 17:57:52.928749,120.4,2451 Lakeshore Rd W & Bronte Rd
1,Petro-Canada,Oakville,2019-08-03 08:56:52.929750,121.9,450 Dundas St E & Postridge Dr
2,Shell,Oakville,2019-08-03 08:41:52.929750,121.9,1528 Dundas St W & Third Line
3,Esso,Oakville,2019-08-03 08:38:52.929750,121.9,1499 Upper Middle Rd & Third Line
4,Petro-Canada,Oakville,2019-08-03 08:10:52.930748,121.9,1123 Dorval Dr & North Service Rd W
5,Esso,Oakville,2019-08-03 07:57:52.930748,121.9,541 Maple Grove Dr & Cornwall Rd
6,Pioneer,Oakville,2019-08-03 06:57:52.930748,121.9,754 Bronte Rd near QEW
7,Econo,Oakville,2019-08-02 17:57:52.930748,121.9,2383 Dundas St W near Old Bronte Rd
8,7-Eleven,Oakville,2019-08-03 05:57:52.931747,123.4,2267 Lakeshore Rd W & East St
9,Husky,Oakville,2019-08-03 07:57:52.931747,123.7,630 Fourth Line near Speers Rd


In [79]:
dfr = df[[1,2,3]].dropna().reset_index(drop=True)
dfr['Prices'] = prices
dfr[1][1]

'Pioneer  (157 reviews)  2451 Lakeshore Rd W & Bronte Rd  Oakville'