In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
import time
import datetime


import pandas as pd
import uuid
import os


ENCODING = "utf-8"

LINK_LIST_PATH = "data/link_list.txt"

def save_link(url, date, page):
    """
    Save collected link/url and page to the .txt file in LINK_LIST_PATH
    """
    with open(LINK_LIST_PATH, "a", encoding=ENCODING) as f:
        f.write("\t".join([url, date, str(page)]) + "\n")


def download_links_from_index():
    """
    This function should go to the defined "url" and download the news page links from all
    pages and save them into a .txt file.
    """

    # Checking if the link_list.txt file exists
    if not os.path.exists(LINK_LIST_PATH):
        with open(LINK_LIST_PATH, "w", encoding=ENCODING) as f:
            f.write("\t".join(["url", "datetime", "page"]) + "\n")
        start_page = 1
        downloaded_url_list = []

    # If some links have already been downloaded,
    # get the downloaded links and start page
    else:
        # Get the page to start from
        data = pd.read_csv(LINK_LIST_PATH, sep="\t")
        if data.shape[0] == 0:
            start_page = 1
            downloaded_url_list = []
        else:
            start_page = data["page"].astype("int").max()
            downloaded_url_list = data["url"].to_list()

    return downloaded_url_list



def keep_pressing_button():
    while True:
        try:
            # Wait for the "DAHA FAZLA" button to be clickable
            print("clicking DAHA FAZLA...")
            button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='btn more-btn']")))
            # Check the display style of the button
            if driver.execute_script("return window.getComputedStyle(arguments[0]).display;", button) == 'none':
                break

            # Click "DAHA FAZLA" button
            button.click()

            # Add a delay for loading the new items
            time.sleep(5)
        except:
            # No more "DAHA FAZLA" buttons, or other error occurred.
            break

def get_url_and_date():

    # Now, all items should be loaded, and you can scrape the buttons
    buttons = driver.find_elements(By.XPATH, "//a[@class='download-report-btn']")
    dates   = driver.find_elements(By.XPATH, "//span[@class='date']")

    # Extract data-url attributes from button elements
    links    = [button.get_attribute('href') for button in buttons]
    datet = [date.get_attribute("innerText") for date in dates]

    return links, datet


def change_month():

    date_start = driver.find_element(By.XPATH, "//input[@id='startDate']")

    date_start.click()

    prev = driver.find_element(By.XPATH, "//th[@class='prev']")

    prev.click()

    calender_start = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, "//td[@class='day' and contains(text(), '1')]")))

    calender_start.click()

    date_end = driver.find_element(By.XPATH, "//input[@id='endDate']")

    date_end.click()

    if driver.find_element(By.XPATH, "//th[@class='datepicker-switch']").get_attribute("innerText").find("Şubat") != -1:
        calender_end = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, "//td[@class='day' and contains(text(), '28')]")))
    else:
        calender_end = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, "//td[@class='day' and contains(text(), '30')]")))

    calender_end.click()


def get_monthly_links(final_date):

    reached_date = False
    #presses "DAHA FAZLA" until it cant
    keep_pressing_button()
    print("end of month")

    real_url_dates = []
    urls, url_dates = get_url_and_date()

    for url_date in url_dates:

        url_date = ".".join([x.zfill(2) for x in url_date.split(".")])
        real_url_dates.append(url_date)
        print(url_date)
        if url_date == final_date:
            reached_date = True

    if not reached_date:
        for collected_url, real_url_date in zip(urls, real_url_dates):
            if collected_url not in downloaded_url_list:

                print("\t", collected_url, flush=True)
                save_link(collected_url, real_url_date, page)

    change_month()

    submit.click()

    return reached_date


#////////////////////////////////////////////////////////////#


#create link list
downloaded_url_list = download_links_from_index()

# Setup WebDriver
driver = webdriver.Chrome()  # or webdriver.Chrome()
driver.get('https://yatirim.akbank.com/tr-tr/raporlar/Sayfalar/Raporlar.aspx')


# "Uygula" button setup
select = Select(driver.find_element(By.XPATH, "//select[@class='select2-hidden-accessible']"))

select.select_by_visible_text('Akbank Günlük Bülten')

submit = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='btn red-btn submit-btn']")))


#beginning special case
date_start = driver.find_element(By.XPATH, "//input[@id='startDate']")

date_end = driver.find_element(By.XPATH, "//input[@id='endDate']")

#!for calendar to pop up need to click into empty bar!
date_start.click()

#set beginning day the 1st of current month
calender_start = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//td[contains(text(), '1')]")))

calender_start.click()

#!for calendar to pop up need to click into empty bar!
date_end.click()

#set end date today
calender_today = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//td[@class='today day']")))

#!for calendar to pop up need to click into empty bar!
calender_today.click()

#click "Uygula"
submit.click()

#end signal
terminate = False

page = 0
#gets months worth of links
while not terminate:
    page = page+1
    if get_monthly_links("04.01.2021"):
        terminate = True


# Close the WebDriver
#driver.quit()

clicking DAHA FAZLA...
clicking DAHA FAZLA...
clicking DAHA FAZLA...
clicking DAHA FAZLA...
end of month
25.05.2023
24.05.2023
23.05.2023
22.05.2023
18.05.2023
17.05.2023
16.05.2023
15.05.2023
12.05.2023
11.05.2023
10.05.2023
09.05.2023
08.05.2023
05.05.2023
04.05.2023
03.05.2023
02.05.2023
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8213
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8210
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8208
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8203
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8200
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8198
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/Rapor/Download.aspx?ID=8195
	 https://yatirim.akbank.com/_layouts/15/AkbankYatirimciPortali/