In [None]:
from functools import reduce

import bs4
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys

### USER NOTE: CLEAR CREDENTIALS BEFORE COMMITING

In [None]:
USERNAME = ""
PASSWORD = ""

In [None]:
site = {
    "login_url": "https://my.mdanderson.org/MyChart/Authentication/Login?postloginurl=Clinical%2fTestResults",
    "username_id": "Login",
    "password_id": "Password",
    "credentials": {
        "username": "",
        "password": "",
    },
    "login_button_id": "submit",
    "login_confirm_css": "menuicon heal",
}

In [None]:
def verify_logged_in(browser, site):
    try:
        element = WebDriverWait(browser, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, site["login_confirm_css"])))
        return element
    except Exception as e:
        print('login not confirmed Exception: {}'.format(str(e)))

In [None]:
def scrape(site):
    browser = login(site)
    load_labs(browser)
    lab_entries = browser.find_elements_by_class_name("SingleResult")
    print(f"labs detected: {len(lab_entries)}")
    return browser, lab_entries
    

def login(site):
    try:
        browser = webdriver.Chrome()
        browser.get(site["login_url"])
        username_box = browser.find_element_by_id(site["username_id"])
        password_box = browser.find_element_by_id(site["password_id"])
        username_box.send_keys(site["credentials"]["username"])
        password_box.send_keys(site["credentials"]["password"])
        login_button = browser.find_element_by_id(site["login_button_id"])
        login_button.click()
        sleep(3)
    except Exception as e:
        print('failed to login {}'.format(str(e)))

    return browser


def load_labs(browser, pause_time=0.5):
    load_more = browser.find_elements_by_class_name("loadmore")
    while load_more:
        try:
            load_more.pop().click()
        except Exception as e:
            print("loadmore done?")
            return False
        sleep(pause_time)
        load_more = browser.find_elements_by_class_name("loadmore")
    return True


def get_lab_metadata(browser):
    pass




In [None]:
browser, lab_entries = scrape(site)

In [None]:
panel_dict = dict()
home_handle = browser.current_window_handle
for entry in lab_entries:
    panel = entry.find_element_by_class_name("ResultName").find_element_by_tag_name("span")
    entry_clickable = entry.find_element_by_class_name("ResultName").find_element_by_tag_name("a")
    entry_name = entry_clickable.find_element_by_tag_name("span").text
    if entry_name in panel_dict:
        continue
    print(entry_name)
    lab_url = entry_clickable.get_attribute("href")
    window = browser.execute_script("window.open('');")
    browser.switch_to.window(browser.window_handles[1])
    browser.get(lab_url)
    tabs = browser.find_elements_by_class_name("membertab")
    try:
        results_tab = list(filter(lambda t: t.find_element_by_tag_name("span").text == "Past Results", tabs)).pop()
    except:
        print(f"{entry_name}: NO TABLE")
        panel_dict[entry_name] = "NO TABLE"
    else:
        results_tab.click()
        start_date = browser.find_element_by_class_name("date")
        start_date.clear()
        start_date.send_keys("1/1/15")
        apply_button = browser.find_element_by_class_name("otherbutton")
        apply_button.click()
        html = browser.page_source
        soup = BeautifulSoup(html,'html.parser')
        table = list(soup.select(".tableWrapper")[0].children)[0]
        df = pd.read_html(str(table))[0]
        df.set_index(df.columns[0], inplace=True)
        panel_dict[entry_name] = df.T
    browser.close()
    browser.switch_to.window(home_handle)

In [None]:
df_list = list(filter(lambda x: isinstance(x, pd.DataFrame), panel_dict.values()))

In [None]:
len(panel_dict)

In [None]:
len(df_list)

In [None]:
df = reduce(lambda df_1, df_2: pd.merge(df_1, df_2, how="outer", left_index=True, right_index=True), df_list)

In [None]:
df.shape

In [None]:
df.to_csv("test_output.csv")