# Website Scraping
This is a simple program to scrap carpark information from a website using Pandas and BeautifulSoup and return it in csv.

In [1]:
import collections
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime

### Set up variables

In [2]:
# Set up dictionary
keys = ['Carpark Name', 'Carpark Address', 'Mon-Fri before 5/6pm', 'Mon-Fri after 5/6pm', 'Sat', 'Sun/Public Holiday']
d = collections.OrderedDict((key,[]) for key in keys)

# Set up variables
full_table = 0
location = 'na'
address = 'na'
url = 'na'

### Define functions: web scraping, saving to dataframe, saving to csv

In [3]:
"""
This function takes the url and returns the parking fees of the carpark
"""
    
def readurl(url):
    dfs = pd.read_html(url)
    data = dfs[6] # dataframe 6 contains the required info
    global full_table
    full_table = data.iloc[:,0] # required info is in first column
    return full_table


"""
This function takes the url and returns the location and address of the carpark
"""

def add_and_loc(url):
    resp = requests.get(url)
    html_doc = resp.text
    soup = BeautifulSoup(html_doc, 'html.parser')

    title = soup.find(class_="grayboxborder") # this class contains the table of information
    global location
    location = title.text.strip()
    loc = location.split("\n")
    location = loc[0]
    global address
    address = loc[1].rstrip()
    return location, address


"""
This function appends all the information to the dictionary
"""

def append(full_table, location, address):
    global d
    d['Carpark Name'].append(location)
    d['Carpark Address'].append(address)
    d['Mon-Fri before 5/6pm'].append(full_table[1].replace('$','\$'))
    d['Mon-Fri after 5/6pm'].append(full_table[3].replace('$','\$'))
    d['Sat'].append(full_table[5].replace('$','\$'))
    d['Sun/Public Holiday'].append(full_table[7].replace('$','\$'))
    

"""
This function passes the dictionary into a dataframe and saves it into a csv file with today's date
"""

def convert(dictionary, n):
    df = pd.DataFrame.from_dict(dictionary, orient='index')
    dft = df.transpose()
    datestring = datetime.strftime(datetime.now(), '%Y%m%d')
    dft.to_csv('Parking_'+datestring+'_'+str(n)+'.csv', index=False)

### Run functions

In [4]:
for n in range(0,10):
    for i in range( 1+(100*n) , 101+(100*n) ): # there are 926 webpages
        try:
            front_url = 'http://www.sgcarmart.com/news/carpark_index.php?ID='
            back_url = '&LOC=all&TYP=carpark&SRH=#carparkrates'
            url = front_url + str("%03d" % i) + back_url
            readurl(url)
            add_and_loc(url)
            append(full_table,location,address)
        except AttributeError: # skip blank/error pages and returns url for reference
            print(url + ' attr')
            i += 1
            continue
        except ValueError: # skip blank/error pages and returns url for reference
            print(url + ' val')
            i += 1
            continue
        except IndexError: # skip blank/error pages and returns url for reference
            print(url + ' index')
            i += 1
            continue

    convert(d,n) # creates a csv for every 100 in case of error midway

http://www.sgcarmart.com/news/carpark_index.php?ID=021&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=142&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=167&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=209&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=344&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=453&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=501&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=532&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=533&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=542&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www

http://www.sgcarmart.com/news/carpark_index.php?ID=972&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=973&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=974&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=975&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=976&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=977&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=978&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=979&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=980&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www.sgcarmart.com/news/carpark_index.php?ID=981&LOC=all&TYP=carpark&SRH=#carparkrates index
http://www