In [33]:
import requests, sys, logging, csv
from io import BytesIO, StringIO
import pandas as pd


# ================================ set logger =================================
logger=logging.getLogger('vic_scraper')
logger.setLevel(logging.INFO)
handler=logging.StreamHandler()
formatter=logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s]: %(message)s',datefmt=r'%Y-%m-%d %H:%M:%S')
# add formatter to handler
handler.setFormatter(formatter)
# add handler to logger
logger.addHandler(handler)
logger.info('Victoria water strider start crawling...')
# =============================================================================
# define worker function
def get_file_info(url):

    headers={'win32':{},
            'linux':{
                "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
                "accept":"application/json, text/javascript, */*; q=0.01",
                "content-type": "application/x-www-form-urlencoded; charset=UTF-8"
            }}

    data={'win32':{},
        'linux':{
            "option": "com_waterregister_reports",
            "task": "report.view",
            "type": "BR04",
            "inputRegion[]": ["Northern","Southern","Western","all"],  # manully combined individual item into a list
            "inputYear[]": "2023"
        }}


    platform=sys.platform

    res=requests.post(url,headers=headers[platform],data=data[platform])
    res.raise_for_status()
    res=res.json()
    file_name=res['csv_name']
    file_path=res['csv_file']
    logger.info(f"file info retrieved. filename: {file_name}")

    return file_name, file_path


def get_csv_response(file_name):
    url=f"https://waterregister.vic.gov.au"
    headers={'win32':{},
            'linux':{
                "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
                "accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            }}

    params={'win32':{},
        'linux':{
            "option": "com_waterregister_reports",
            "task": "download",
            "file": file_name
        }}

    platform=sys.platform
    res=requests.get(url,headers=headers[platform],params=params[platform])
    res.raise_for_status()
    logger.info(f'Response for {file_name} has been successfully retrieved')
    return res


def get_byte_data(response):
    # initialize byteIO
    f=BytesIO()
    # write to byteIO
    for chunk in response.iter_content(chunk_size=1024):
        f.write(chunk)
    # reset seeker
    f.seek(0)
    logger.info('Done writing http response to memory')
    return f

[2023-04-28 14:47:10 - vic_scraper - INFO]: Victoria water strider start crawling...
[2023-04-28 14:47:10 - vic_scraper - INFO]: Victoria water strider start crawling...
[2023-04-28 14:47:10 - vic_scraper - INFO]: Victoria water strider start crawling...
[2023-04-28 14:47:10 - vic_scraper - INFO]: Victoria water strider start crawling...
[2023-04-28 14:47:10 - vic_scraper - INFO]: Victoria water strider start crawling...


In [None]:

url=r'https://www.waterregister.vic.gov.au'
file_name, _ =get_file_info(url=url)
response=get_csv_response(file_name)

In [55]:

df

Unnamed: 0,Status,Approval Date,Seller Water Authority,Seller Owner Type,Seller Trading Zone Source,Buyer Water Authority,Buyer Owner Type,Buyer Trade Zone Source,Volume Traded (ML),Price Per ML ($),Deal Date,Deal Type,Application Source,Region,Commercial Category,Environmental Category,Trade Region
0,Approved,10/03/2023,Southern Rural Water,Private,41 Macalister,Southern Rural Water,Private,41 Macalister,30.0,0.00,,,Water Corporation,Southern,Non Commercial,Non Environmental,South
1,Approved,15/07/2022,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,31.3,0.00,,,Water Corporation,Northern,Non Commercial,Non Environmental,Within North VIC
2,Approved,14/11/2022,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,26.5,0.00,,,Water Corporation,Northern,Non Commercial,Non Environmental,Within North VIC
3,Approved,02/12/2022,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,50.0,0.00,,,Water Corporation,Northern,Non Commercial,Non Environmental,Within North VIC
4,Approved,09/12/2022,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,Lower Murray Water,Private,7 VIC Murray - Barmah to SA,44.2,0.00,,,Water Corporation,Northern,Non Commercial,Non Environmental,Within North VIC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7498,Approved,27/04/2023,Goulburn-Murray Water,Private,6 VIC Murray - Dart to Barmah,Goulburn-Murray Water,Private,6 VIC Murray - Dart to Barmah,104.9,376.91,23/12/2019,Private lease arrangement (not LTT),Broker portal,Northern,Commercial,Non Environmental,Within North VIC
7499,Approved,27/04/2023,Goulburn-Murray Water,Private,1A Greater Goulburn,Goulburn-Murray Water,Private,1A Greater Goulburn,250.0,290.86,08/04/2020,Private lease arrangement (not LTT),Broker portal,Northern,Commercial,Non Environmental,Within North VIC
7500,Refused,27/04/2023,Goulburn-Murray Water,Private,6 VIC Murray - Dart to Barmah,Goulburn-Murray Water,Private,7 VIC Murray - Barmah to SA,450.0,0.00,27/04/2023,Related party trade,Broker portal,Northern,Non Commercial,Non Environmental,Within North VIC
7501,Approved,27/04/2023,Goulburn-Murray Water,Private,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,Private,1A Greater Goulburn,450.0,0.00,27/04/2023,Related party trade,Broker portal,Northern,Non Commercial,Non Environmental,Within North VIC
