In [None]:
#Simple program to scrap carpark information and return it in csv.
"""
This program uses the pandas and BeautifulSoup to scrap the website and pass the information into a dataframe.
The result is saved into a csv file.
"""

import collections
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime

#Set up dictionary
keys = ['Carpark Name', 'Carpark Address', 'Mon-Fri before 5/6pm', 'Mon-Fri after 5/6pm', 'Sat', 'Sun/Public Holiday']
dict = collections.OrderedDict((key,[]) for key in keys)

#Set up variables
full_table = 0
location = 'na'
address = 'na'
url = 'na'

#Define function to scrap website for parking fees
def readurl(url):
    """
    This function takes the website url and returns the parking fees of the carpark
    """
    dfs = pd.read_html(url)
    data = dfs[6] #dataframe 6 contains the required info
    global full_table
    full_table = data.iloc[:,0] #required info is in first column
    return full_table

#Define function to scrap website for carpark location and address
def add_and_loc(url):
    """
    This function takes the website url and returns the location and address of the carpark
    """
    resp = requests.get(url)
    html_doc = resp.text
    soup = BeautifulSoup(html_doc, 'html.parser')

    title = soup.find(class_="grayboxborder")
    global location
    location = title.text.strip()
    loc = location.split("\n")
    location = loc[0]
    global address
    address = loc[1].rstrip()
    return location, address

#Define function to append all information into the dictionary 
def append(full_table, location, address):
    """
    This function takes all the parking fees, location and address of the carpark and appends it to the dictionary
    """
    global dict
    dict['Carpark Name'].append(location)
    dict['Carpark Address'].append(address)
    dict['Mon-Fri before 5/6pm'].append(full_table[1].replace('$','\$'))
    dict['Mon-Fri after 5/6pm'].append(full_table[3].replace('$','\$'))
    dict['Sat'].append(full_table[5].replace('$','\$'))
    dict['Sun/Public Holiday'].append(full_table[7].replace('$','\$'))

#Run the scraping across webpages
for i in range(1,915): #there are 914 webpages
    try:
        front_url = 'http://www.sgcarmart.com/news/carpark_index.php?ID='
        back_url = '&LOC=all&TYP=carpark&SRH=#carparkrates'
        url = front_url + str("%03d" % i) + back_url
        readurl(url)
        add_and_loc(url)
        append(full_table,location,address)
    except AttributeError: #skip blank/error pages and returns url for reference
        print(url + ' attr')
        i += 1
        continue
    except ValueError: #skip blank/error pages and returns url for reference
        print(url + ' val')
        i += 1
        continue
    except IndexError: #skip blank/error pages and returns url for reference
        print(url + ' index')
        i += 1
        continue

#Pass the dictionary into a dataframe and convert to csv file with today's date
df = pd.DataFrame.from_dict(dict, orient='index')
dft = df.transpose()
datestring = datetime.strftime(datetime.now(), '%Y%m%d')
dft.to_csv('Parking_'+datestring+'.csv', index=False)