In [1]:
from pathlib import Path

In [None]:
with open(Path('../search.txt'), 'r') as file:
    search_term = file.read()

In [None]:
from datetime import datetime
from playwright.async_api import async_playwright
from dotenv import load_dotenv
import os
import random
import asyncio
import time
from tqdm.notebook import tqdm

load_dotenv()
DOMAIN = os.environ['DOMAIN']
USERNAME = os.environ['USERNAME']
PASSWORD = os.environ['PASSWORD']

DETAILED_LOGGING = True

class DailyDownloadLimitReached(Exception):
    """Exception raised when the daily download limit is reached."""
    pass

async def random_wait(min_seconds=0.2, max_seconds=1.4):
    """
    Asynchronous function that waits for a random amount of time between min_seconds and max_seconds.
    """
    wait_time = random.uniform(min_seconds, max_seconds)
    await asyncio.sleep(wait_time)

async def login(page):
    await page.goto(f"https://{DOMAIN}/forum/tracker.php")
    await random_wait(min_seconds=3, max_seconds=4)
    await page.fill('input[name="login_username"]', USERNAME)
    await random_wait()
    await page.fill('input[name="login_password"]', PASSWORD)
    await random_wait()
    await page.click('input[type="submit"][name="login"][value="Вход"]')
    await random_wait()

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=False)
    page = await browser.new_page()
    await login(page)
    await asyncio.sleep(5)
    
    search_input = await page.query_selector('#title-search')
    assert search_input
    await search_input.fill(search_term)
    await random_wait()
    if DETAILED_LOGGING:
        print(f"Search input filled with '{search_term}'")
        
    # Select the "Size" option from the dropdown using the Russian word "Размер"
    size_option = await page.query_selector('select[name="o"] option:has-text("Размер")')
    assert size_option, "Size option not found"
    await size_option.evaluate('option => option.selected = true')
    
    # Trigger change event on the select element
    await page.evaluate('document.querySelector("select[name=\'o\']").dispatchEvent(new Event("change"))')
    
    await random_wait()
    if DETAILED_LOGGING:
        print("Size option (Размер) selected for sorting")
        
        
    search_button = await page.query_selector('input#tracker-submit[type="submit"][value="Поиск"]')
    assert search_button
    await search_button.click()
    await random_wait(min_seconds=2, max_seconds=3)
    if DETAILED_LOGGING:
        print("Search button clicked")
            
    # Initialize an empty list to store all links
    all_links = []
    try:
        while True:
            # Get all the links from the current search results page
            links = await page.evaluate('''
                () => {
                    const links = Array.from(document.querySelector('#search-results').querySelectorAll('a'))
                        .filter(link => link.href.includes('/viewtopic.php?t='))
                        .map(link => link.href);
                    return links;
                }
            ''')
            
            all_links.extend(links)
            
            if DETAILED_LOGGING:
                print(f"Found {len(links)} links on current page")
                if links:
                    print(f"First link: {links[0]}")
            
            # Check if there's a "Next" button
            next_button = await page.query_selector('a.pg:has-text("След.")')
            
            if not next_button:
                break
            
            # Click the "Next" button
            await next_button.click()
            await random_wait(min_seconds=2, max_seconds=3)
            
            if DETAILED_LOGGING:
                print("Moved to next page")
        
        # Join all the collected links into a single string, each on a new line
        links_text = '\n'.join(all_links)
        
        if DETAILED_LOGGING:
            print(f"Total links extracted: {len(all_links)}")
            print("All links extracted and joined")
        # Get the current date and time with seconds
        current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Create the output directory if it doesn't exist
        os.makedirs("output/links", exist_ok=True)
        
        # Write the links to a file
        output_file = f"../output/links/{current_datetime}.txt"
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(links_text)
        
        if DETAILED_LOGGING:
            print(f"Links written to {output_file}")
    finally:
        time.sleep(3)
        await browser.close()
  