In [309]:
from selenium import webdriver
from selenium.webdriver import FirefoxOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import os
import time
import pandas as pd

In [310]:

# Define CER scraper class to scrape REC Registry for and individual accreditation code
class CER_Scraper():
  
    def __init__(self):
        self.accreditation_Code = None
        self.search_results_data = []
        self.transfer_history_data = []
        self.opts = FirefoxOptions()
        self.opts.add_argument("--headless")
        self.driver = webdriver.Firefox(options=self.opts)
        self.s_headers = []
        self.t_headers = []
        self.URL = "https://www.rec-registry.gov.au/rec-registry/app/public/stc-register"
        self.base_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))+'/'

    def search(self):
        try:
            # Load the webpage
            self.driver.get(self.URL)

            # Find the search field
            accreditation_code_field = self.driver.find_element(By.ID, 'accreditationCode')  # adjust the locator if it's not accurate

            # Paste in the accreditation code and press enter
            accreditation_code_field.send_keys(self.accreditation_Code)
            accreditation_code_field.send_keys(Keys.ENTER)
            
            # Wait for the page to load
            time.sleep(6) 
            print('Search successful')
            
        except Exception as error:
            print('Error: search failed',error)

    def get_headers(self):
        try:
           
            print('get headers successful')

        except Exception as error:
            print('Error: get headers failed',error)

    def scraper(self):
        try:
            
            # Extract table headers of search results table
            s_headers = [header.text for header in self.driver.find_elements(By.XPATH, '//*[@id="search-ranges-results"]/thead/tr/th')]
            self.s_headers = s_headers
            # Extract table rows of search results table
            s_rows = self.driver.find_elements(By.XPATH, '//*[@id="search-ranges-results"]/tbody/tr')

            for s_row in s_rows:
                # Get the search resaults table data
                s_cols = s_row.find_elements(By.TAG_NAME, 'td') 
                s_cols_data = [ele.text for ele in s_cols]
                self.search_results_data.append(s_cols_data)

                # Click the link in the initial table to open the transfer history popup
                link = s_cols[7].find_element(By.TAG_NAME, 'a')
                link.click()

                # Get the transfer history table data
                t_headers = [header.text for header in self.driver.find_elements(By.XPATH, '//*[@id="transfer-history-results"]/thead/tr/th')]
                self.t_headers = t_headers
                self.t_headers.append('associated_certificate_serial_number')
                self.t_headers.append('accreditationCode')
                
                t_rows = self.driver.find_elements(By.XPATH, '//*[@id="transfer-history-results"]/tbody/tr')

                for t_row in t_rows:
                    t_cols = t_row.find_elements(By.TAG_NAME, 'td') 
                    t_cols.append(s_cols[7]) # Add the associated certificate serial number to the transfer history table
                    t_cols.append(s_cols[1]) # Add the accreditation code to the transfer history table
                    t_cols_data = [ele.text for ele in t_cols]
                    self.transfer_history_data.append(t_cols_data)

                # Close the transfer history popup
                html = self.driver.find_elements(By.XPATH, '//*[@id="certificate-details-dialog"]')[0]
                html.send_keys(Keys.ESCAPE)

                print('scrape successful',self.accreditation_Code)

        except Exception as error:
            print('Error: scrape failed',error)
                
    def close_driver(self):
        try:
            self.driver.close()
            print('close driver successful')

        except Exception as error:
            print('Error: close driver failed',error)

    def display_data(self):
        try:
            print(self.search_results_data)
            print(self.transfer_history_data)
            print('display data successful')

        except Exception as error:
            print('Error: display data failed',error)

In [311]:
accreditation_Code_List = ['PVD2393168','PVD2393190','PVD2393171','PVD2393172','PVD2393173','PVD2393200','PVD2393208','PVD2393444','SW2393250','WCMGQL01']
search_results_df = []
transfer_history_df = []
s_headers_df = []
t_headers_df = []


# Define a function to scrape the REC Registry for a list of accreditation codes
def scrape(search_results_df, transfer_history_df, s_headers_df, t_headers_df, accreditation_Code_List):
    # Initialize the WebDriver outside of the loop
      
    # Close the WebDriver after exiting the loop
  
    Run_1 = CER_Scraper()
    
    for ac in accreditation_Code_List:
        # Set the accreditation code
        Run_1.accreditation_Code = ac
        Run_1.search_results_data = []
        Run_1.transfer_history_data = []
        # Run the search
        Run_1.search()
        # Scrape the data
        Run_1.scraper()
        # Append the data to the dataframes
        search_results_df += Run_1.search_results_data
        transfer_history_df += Run_1.transfer_history_data

   
    s_headers_df.clear()
    s_headers_df.extend(Run_1.s_headers)

    t_headers_df.clear()
    t_headers_df.extend(Run_1.t_headers)
   
    # Close the WebDriver after exiting the loop
    Run_1.close_driver()
    
    return search_results_df, transfer_history_df,s_headers_df,t_headers_df

# Run the scraper
scrape(search_results_df,transfer_history_df,s_headers_df,t_headers_df,accreditation_Code_List)

search_results_df = pd.DataFrame(search_results_df, columns=s_headers_df)
transfer_history_df = pd.DataFrame(transfer_history_df, columns=t_headers_df)

Search successful
scrape successful PVD4069315
scrape successful PVD4069315
Search successful
scrape successful PVD2393168
close driver successful


In [312]:
search_results_df

Unnamed: 0,Current owner,Accreditation code,Fuel source,Generation year,Creation year,Generation state,Status,Certificate serial number,Certificate quantity
0,Red Energy Pty. Limited,PVD4069315,S.G.U. - solar (deemed),2021,2021,WA,Invalid due to surrender,1-70,70
1,Red Energy Pty. Limited,PVD4069315,S.G.U. - solar (deemed),2021,2021,WA,Invalid due to surrender,71-91,21
2,"AGL HP1 Pty Limited, AGL HP2 Pty Limited and A...",PVD2393168,S.G.U. - solar (deemed),2015,2016,WA,Invalid due to surrender,1-105,105


In [313]:
transfer_history_df

Unnamed: 0,Transferred date,Seller,Buyer,associated_certificate_serial_number,accreditationCode
0,5/5/2021 03:50,SIKHAN PTY LTD,RETA (WA) Pty Ltd,1-70,PVD4069315
1,11/5/2021 01:42,RETA (WA) Pty Ltd,Snowy Hydro Limited,1-70,PVD4069315
2,3/2/2022 23:56,Snowy Hydro Limited,Red Energy Pty. Limited,1-70,PVD4069315
3,6/5/2021 05:07,SIKHAN PTY LTD,RETA (WA) Pty Ltd,71-91,PVD4069315
4,11/5/2021 01:42,RETA (WA) Pty Ltd,Snowy Hydro Limited,71-91,PVD4069315
5,3/2/2022 23:56,Snowy Hydro Limited,Red Energy Pty. Limited,71-91,PVD4069315
6,13/7/2016 00:05,Ashley Noon T/A Green Wiring,Emerging Energy Solutions Group Pty Ltd,1-105,PVD2393168
7,13/7/2016 00:41,Emerging Energy Solutions Group Pty Ltd,"AGL HP1 Pty Limited, AGL HP2 Pty Limited and A...",1-105,PVD2393168
