In [67]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver import FirefoxOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import os
import time
import pandas as pd
from multiprocessing import Pool, cpu_count, Manager, Value
from queue import Queue
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

def scrape(accr_code):
    global counter
    driver = None
    search_results_data = []
    transfer_history_data = []
    try:
        opts = FirefoxOptions()
        opts.add_argument("--headless")
        driver = webdriver.Firefox(options=opts)
        driver.get("https://www.rec-registry.gov.au/rec-registry/app/public/stc-register")
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'accreditationCode'))).send_keys(accr_code, Keys.ENTER)
        time.sleep(10)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="search-ranges-results"]/tbody/tr')))
        s_rows = driver.find_elements(By.XPATH, '//*[@id="search-ranges-results"]/tbody/tr')

        for s_row in s_rows:
            s_cols = s_row.find_elements(By.TAG_NAME, 'td')
            s_cols_data = [ele.text for ele in s_cols]
            search_results_data.append(s_cols_data)
            
            link = s_cols[7].find_element(By.TAG_NAME, 'a')
            link.click()
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="transfer-history-results"]/tbody/tr')))
            t_rows = driver.find_elements(By.XPATH, '//*[@id="transfer-history-results"]/tbody/tr')
            
            for t_row in t_rows:
                t_cols = t_row.find_elements(By.TAG_NAME, 'td')
                t_cols_data = [ele.text for ele in t_cols]
                if t_cols_data == ['No data available in table']:
                    t_cols_data.append(None)
                    t_cols_data.append(None)
                else:
                    t_cols_data.append(s_cols[7].text)
                    t_cols_data.append(s_cols[1].text)

                transfer_history_data.append(t_cols_data)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="certificate-details-dialog"]')))
            html = driver.find_element(By.XPATH, '//*[@id="certificate-details-dialog"]')
            html.send_keys(Keys.ESCAPE)
    except Exception as e:
        print(f"An error occurred in scrape: {e}")
    finally:
        if driver:
            driver.quit()
    counter += 1
    print(f"Processed {counter} out of {len(accreditation_Code_List)}")
    return search_results_data, transfer_history_data

def scrape_all(accreditation_Code_List):
    search_results_all = []
    transfer_history_all = []
    with ThreadPoolExecutor() as executor:
        results = executor.map(scrape, accreditation_Code_List)
    for result in results:
        search_results, transfer_history = result
        search_results_all.extend(search_results)
        transfer_history_all.extend(transfer_history)
    return search_results_all, transfer_history_all

if __name__ == '__main__':

    accreditation_Code_List = ['PVD2393168','PVD2393190','PVD2393171','PVD2393172']

    def get_headers():
        driver = None
        try:
            opts = FirefoxOptions()
            opts.add_argument("--headless")
            driver = webdriver.Firefox(options=opts)
            driver.get("https://www.rec-registry.gov.au/rec-registry/app/public/stc-register")
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'accreditationCode'))).send_keys(accreditation_Code_List[0], Keys.ENTER)
            time.sleep(10)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="search-ranges-results"]/tbody/tr')))

            s_headers = [header.text for header in driver.find_elements(By.XPATH, '//*[@id="search-ranges-results"]/thead/tr/th')]
            s_rows = driver.find_elements(By.XPATH, '//*[@id="search-ranges-results"]/tbody/tr')
            
            for s_row in s_rows:
                s_cols = s_row.find_elements(By.TAG_NAME, 'td') 
                link = s_cols[7].find_element(By.TAG_NAME, 'a')
                link.click()
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="transfer-history-results"]/tbody/tr')))
                t_headers = [header.text for header in driver.find_elements(By.XPATH, '//*[@id="transfer-history-results"]/thead/tr/th')]
                t_headers.append("Certificate Number")
                t_headers.append("accreditation_Code")
        except Exception as e:
            print(f"An error occurred in get_headers: {e}")

        finally:
            if driver:
                driver.quit()
        return s_headers, t_headers
    
    s_headers, t_headers = get_headers()

    counter = 0
    search_results_data, transfer_history_data = scrape_all(accreditation_Code_List)

    search_results_data = pd.DataFrame(search_results_data, columns=s_headers)
    transfer_history_data = pd.DataFrame(transfer_history_data, columns=t_headers)

Processed 1 out of 4
Processed 2 out of 4
Processed 3 out of 4
Processed 4 out of 4


In [68]:
search_results_data

Unnamed: 0,Current owner,Accreditation code,Fuel source,Generation year,Creation year,Generation state,Status,Certificate serial number,Certificate quantity
0,"AGL HP1 Pty Limited, AGL HP2 Pty Limited and AGL HP3 Pty Limited",PVD2393168,S.G.U. - solar (deemed),2015,2016,WA,Invalid due to surrender,1-105,105
1,Solargain PV Pty Ltd,PVD2393190,S.G.U. - solar (deemed),2015,2016,ACT,Invalid due to audit,1-64,64
2,Home Comfort and Sustainability Services Pty Ltd T/A Australian Solar Power Consultants,PVD2393171,S.G.U. - solar (deemed),2015,2016,QLD,Invalid due to audit,1-126,126
3,Home Comfort and Sustainability Services Pty Ltd T/A Australian Solar Power Consultants,PVD2393172,S.G.U. - solar (deemed),2015,2016,QLD,Invalid due to audit,1-417,417


In [69]:
transfer_history_data

Unnamed: 0,Transferred date,Seller,Buyer,Certificate Number,accreditation_Code
0,13/7/2016 00:05,Ashley Noon T/A Green Wiring,Emerging Energy Solutions Group Pty Ltd,1-105,PVD2393168
1,13/7/2016 00:41,Emerging Energy Solutions Group Pty Ltd,"AGL HP1 Pty Limited, AGL HP2 Pty Limited and AGL HP3 Pty Limited",1-105,PVD2393168
2,No data available in table,,,,
3,No data available in table,,,,
4,No data available in table,,,,
