# Scrape additional data

## Park

In [None]:
import requests
from bs4 import BeautifulSoup

url = 'https://vicparks.com.au/find-a-park/'  
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

info_divs = soup.find_all('div', class_='park-info-holder')


addresses = []
for div in info_divs:
    address_span = div.find('span', class_='address park-info')
    if address_span:
        address = address_span.find('a').get_text(strip=True)
        if address:
            addresses.append(address)

for address in addresses:
    print(address)

In [62]:
import requests
from bs4 import BeautifulSoup
import csv

base_url = 'https://vicparks.com.au/find-a-park/page/'
page_number = 1
addresses = []

while True:
    url = f'{base_url}{page_number}/'
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    info_divs = soup.find_all('div', class_='park-info-holder')
    
    if not info_divs:
        break
    
    for div in info_divs:
        address_span = div.find('span', class_='address park-info')
        if address_span:
            address = address_span.find('a').get_text(strip=True)
            if address:
                addresses.append(address)
    
    page_number += 1

with open('../data/landing/park.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['ID', 'Address']) 
    for i, address in enumerate(addresses, start=1):
        writer.writerow([i, address])



## Hospital and health services

In [64]:
import requests
from bs4 import BeautifulSoup
import csv

base_url = 'https://vahi.vic.gov.au/hospital-and-health-services'
page_number = 26
data = []

while True:
    url = f"{base_url}?page={page_number}"
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    address_divs = soup.find_all('div', class_='views-field views-field-field-address')
    
    if not address_divs:
        break
    
    for div in address_divs:
        address = div.get_text(strip=True)  
        data.append(address)
    
    next_page_link = soup.find('a', text='Next')
    if next_page_link:
        page_number += 1
    else:
        break

with open('../data/landing/hospital_health_services_addresses.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Address']) 
    for address in data:
        writer.writerow([address])  



In [65]:
import requests
from bs4 import BeautifulSoup
import csv

# 基础网址
base_url = 'https://vahi.vic.gov.au/hospital-and-health-services'
page_number = 1
data = []

while True:
    url = f"{base_url}?page={page_number}"
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    address_divs = soup.find_all('div', class_='views-field views-field-field-address')
    
    if not address_divs:
        break
    
    for div in address_divs:
        address = div.get_text(strip=True) 
        data.append(address)
    
    page_number += 1

# save as CSV 
with open('../data/landing/hospital_health_services_addresses.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Address'])  
    for address in data:
        writer.writerow([address])  



## Shopping mall

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

url = 'https://en.wikipedia.org/wiki/List_of_shopping_centres_in_Australia#Victoria'

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

victoria_heading = soup.find('h2', {'id': 'Victoria'})

shopping_centres = {}

current_region = None
for tag in victoria_heading.find_all_next():
    if tag.name == 'h2': 
        break

    if tag.name == 'h3': 
        current_region = tag.get_text(strip=True)
        shopping_centres[current_region] = []

    elif tag.name == 'ul' and current_region:  
        for li in tag.find_all('li'):
            shopping_centres[current_region].append(li.get_text(strip=True))

for region, centres in shopping_centres.items():
    print(region)
    for centre in centres:
        print(f"  - {centre}")

with open('../data/landing/victoria_shopping_centres.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Region', 'Shopping Centre']) 

    for region, centres in shopping_centres.items():
        for centre in centres:
            writer.writerow([region, centre])

Melbourne CBD
  - Collins Place,Collins Street
  - Emporium Melbourne,Bourke Street
  - Block Arcade,Collins Street
  - The District Docklands,Docklands
  - Galleria,Elizabeth Street
  - Melbourne Central,Lonsdale Street
  - Melbourne GPO,Bourke Street
  - QV Village,Lonsdale Street
  - Royal Arcade,Bourke Street
  - St Collins Lane,Collins Street
  - Southbank Promenade,Southbank
  - Kmart Centre,Bourke Street
Melbourne suburbs
  - Armada Dandenong Plaza,Dandenong
  - Bayside Shopping Centre,Frankston
  - Box Hill Central,Box Hill
  - Broadmeadows Central,Broadmeadows
  - Caulfield Plaza Shopping Centre,Caulfield
  - Chadstone Shopping Centre,Malvern East
  - Chirnside Park Shopping Centre,Chirnside Park
  - DFO Essendon,Essendon Fields
  - DFO Moorabbin,Cheltenham
  - DFO South Wharf,South Wharf
  - DFO Unihill,Bundoora
  - Eastland Shopping Centre,Ringwood
  - Essendon Fields Shopping Centre,Essendon Fields
  - Forest Hill Chase Shopping Centre,Forest Hill
  - Greensborough Plaza,Gr

## Train station

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Fetch the webpage content
url = 'https://en.wikipedia.org/wiki/List_of_Metro_Trains_Melbourne_railway_stations'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Locate the table
heading = soup.find('h2', id='List_of_current_stations')
table = heading.find_next('table', class_='wikitable')

# Extract Stations and Suburbs
def extract_station_suburb(table):
    stations = []
    suburbs = []

    for row in table.find_all('tr')[1:]:  # Skip header row
        cells = row.find_all('td')

        # Default values
        station = None
        suburb = None

        if len(cells) >= 1:
            # Extract station from <th> or <td>
            station_td = row.find('th')  # Station typically in <th>
            if station_td:
                station_tag = station_td.find('a')
                if station_tag:
                    station = station_tag.text.strip()

            # Find suburb within cells
            for cell in cells:
                suburb_tag = cell.find('a')
                if suburb_tag:
                    # Extract the title attribute for suburb
                    suburb_title = suburb_tag.get('title', None)
                    if suburb_title:
                        suburb = suburb_title
                        break  # Stop after finding the suburb
            
            # Append results
            stations.append(station)
            suburbs.append(suburb)

    return stations, suburbs

stations, suburbs = extract_station_suburb(table)

# Create DataFrame and save to CSV
df = pd.DataFrame({
    'Station': stations,
    'Suburb': suburbs
})

df.to_csv('../data/landing/stations_and_suburbs.csv', index=False)

# Print DataFrame to check results
print(df.head())


      Station                             Suburb
0    Aircraft           Aircraft railway station
1     Alamein                       Alamein line
2      Albion  Albion railway station, Melbourne
3  Alphington         Alphington railway station
4      Altona             Altona railway station
