In [1]:
import pandas as pd
from extract import open_driver, close_driver, wait_for_element
from transforms import parse_record, parse_game_date
from selenium.webdriver.common.by import By
from DML import db_connect, close_db_connect
from sql_queries import summary_table_insert
from psycopg2.extras import execute_batch

In [43]:
class Scraper:
    def __init__(self):
        self.driver = None
        self.conn = None
        self.cur = None
        self.test_attribute = 5
    
    def open_driver(self):
        self.driver = open_driver()
    
    def close_driver(self):
        if self.driver:
            close_driver(self.driver)
            
    def db_connect(self):
        self.conn, self.cur = db_connect()
    
    def close_db_connect(self):
        if self.conn and self.cur:
            close_db_connect(self.conn, self.cur)
            
            
    #-----------------
    # SCRAPING HELPERS
    #-----------------
    def __get_table_columns(self, source):
        """return the table's column names as a list of strings"""
        table_headers = wait_for_element(source=source, search_by=By.CLASS_NAME, target="tableHeaderDiv", unique_element=True)
        headers = wait_for_element(source=table_headers, search_by=By.CLASS_NAME, target="rt-th")
        header_names = [header.text for header in headers]
    
        return header_names
    
    def __get_table_rows(source, headers):
        """return the table data as a nested list"""
        table_rows = wait_for_element(source=source, search_by=By.CLASS_NAME, target="rt-tbody", unique_element=True)
        rows = wait_for_element(source=table_rows, search_by=By.CLASS_NAME, target="rt-tr")
        
        return rows

In [44]:
class SummaryScraper(Scraper):
    def __init__(self):
        super().__init__()
    
    def scrape_page(self):
        self.open_driver()
        
        self.driver.get(url)
        root_element = wait_for_element(source=self.driver, search_by=By.ID, target="root", unique_element=True)
        data_table = wait_for_element(source=root_element, search_by=By.CLASS_NAME, target="rt-table", unique_element=True)

#         page_jump = wait_for_element(source=root_element, search_by=By.CLASS_NAME, target="-pageJump", unique_element=True)
#         page_number = wait_for_element(source=root_element, search_by=By.CLASS_NAME, target="-totalPages", unique_element=True)

        table_headers = self.__get_table_columns(data_table)
        table_data = self.__get_table_rows(data_table, table_headers)
    
        return table_data
    
    #-----------------
    # SCRAPING HELPERS
    #-----------------
    def __get_table_columns(self, source):
        """return the table's column names as a list of strings"""
        table_headers = wait_for_element(source=source, search_by=By.CLASS_NAME, target="tableHeaderDiv", unique_element=True)
        headers = wait_for_element(source=table_headers, search_by=By.CLASS_NAME, target="rt-th")
        header_names = [header.text for header in headers]
    
        return header_names
    
    def __get_table_rows(source, headers):
        """return the table data as a nested list"""
        table_rows = wait_for_element(source=source, search_by=By.CLASS_NAME, target="rt-tbody", unique_element=True)
        rows = wait_for_element(source=table_rows, search_by=By.CLASS_NAME, target="rt-tr")
        table_data = []

        for row in rows:
            cells = wait_for_element(source=row, search_by=By.CLASS_NAME, target="rt-td")
            row_values = [cell.text for cell in cells]

            values_map = list(zip(headers, row_values))
            map_dict = parse_record(values_map)

            ordered_data_list = [
                map_dict["Team"],
                "2021/22",
                map_dict["game_date"],
                map_dict["home_game"],
                map_dict["opponent"],
                map_dict["W"],
                map_dict["L"],
                map_dict["T"],
                map_dict["OT"],
                map_dict["P"],
                map_dict["P%"],
                map_dict["RW"],
                map_dict["ROW"],
                map_dict["S/O Win"],
                map_dict["GF"],
                map_dict["GA"],
                map_dict["PP%"],
                map_dict["PK%"],
                map_dict["Net PP%"],
                map_dict["Net PK%"],
                map_dict["Shots/GP"],
                map_dict["GA/GP"],
                map_dict["FOW%"]
            ]

            table_data.append(ordered_data_list)

        return table_data

In [45]:
scraper = SummaryScraper()

In [46]:
scraper.open_driver()
scraper.close_driver()

In [47]:
scraper.db_connect()
scraper.close_db_connect()