<a href="https://colab.research.google.com/github/Anshuman23018/hello/blob/main/automation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install selenium

In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import re
import time
import requests
import shutil
from selenium.webdriver.chrome.options import Options

# Function to download file from URL
def download_file(url, filename):
    response = requests.get(url)
    with open(filename, "wb") as file:
        file.write(response.content)

# Function to upload a single file, submit the form, and get the job link
def upload_and_submit(driver, wait, folder_path, upload_input):
    job_links = []  # Initialize an empty list to store tuples of filename and job link

    # Upload a file
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        print("Uploading file:", file_path)  # Print the complete file path

        upload_input.send_keys(file_path)

        # Dismiss any overlay or popup
        try:
            overlay = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "modal-overlay")))
            driver.execute_script("arguments[0].click();", overlay)
        except:
            pass

        # Dismiss any other overlay if present
        try:
            other_overlay = driver.find_element(By.CLASS_NAME, "other-modal-overlay-class")
            driver.execute_script("arguments[0].click();", other_overlay)
        except:
            pass

        # Submit the form
        submit_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@id='btn_submit']")))
        submit_button.click()

        # Wait for file generation process to complete
        time.sleep(10)  # Adjust this as needed

        # Find the joblink element and extract the link
        joblink_element = wait.until(EC.presence_of_element_located((By.ID, "joblink")))
        joblink_href = joblink_element.get_attribute("href")

        # Print or store the link
        print("Job link for", file_name, ":", joblink_href)

        # Store the filename and job link in the list as a tuple
        job_links.append((file_name, joblink_href))

        # Click anywhere on the screen outside the popup to dismiss it
        driver.find_element(By.XPATH, "//body").click()

        # Wait for the upload input element to be clickable again
        upload_input = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@type='file']")))

    return job_links

# Set up Chrome WebDriver with headless option
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')

# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)  # You can use other browsers too
wait = WebDriverWait(driver, 20)

# Open the website
driver.get("http://sts.bioe.uic.edu/castp/calculation.html")

try:
    # Find the file upload input element
    upload_input = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@type='file']")))

    # Upload files from a folder
    folder_path = "/content/upload"  # Adjust path accordingly
    job_links = upload_and_submit(driver, wait, folder_path, upload_input)

    # Add a delay before starting the download process
    print("Waiting for 60 seconds before starting the download process...")
    time.sleep(120)

    download_dir = "/content/download"  # Adjust path accordingly
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    for filename, joblink in job_links:
        print("Processing job link for", filename, ":", joblink)
        driver.get(joblink)

        # Wait for 60 seconds before clicking the download button
        time.sleep(60)

        download_button = wait.until(EC.presence_of_element_located((By.XPATH, "//button[@id='downloadbtn']")))

        # Scroll to the download button
        driver.execute_script("arguments[0].scrollIntoView();", download_button)

        # Click the download button
        download_button.click()

        # Wait for the download to complete
        time.sleep(5)

        # Find the downloaded file in the default download directory
        default_download_dir = "/content"  # Adjust path accordingly
        files_in_dir = os.listdir(default_download_dir)
        downloaded_file = max([os.path.join(default_download_dir, f) for f in files_in_dir], key=os.path.getctime)

        # Extract the job ID from the job link
        job_id = re.search(r'j_(\w+)', joblink).group(1)

        # Move the downloaded file to the desired directory with the new filename and .zip extension
        target_file_path = os.path.join(download_dir, f"{os.path.splitext(filename)[0]}_{job_id}.zip")
        shutil.move(downloaded_file, target_file_path)
        print("File downloaded and saved as:", target_file_path)

finally:
    # Close the browser
    driver.quit()


Uploading file: /content/upload/5x5f.pdb
Job link for 5x5f.pdb : http://sts.bioe.uic.edu/castp/index.html?j_662641f93101d
Uploading file: /content/upload/6rr7.pdb
Job link for 6rr7.pdb : http://sts.bioe.uic.edu/castp/index.html?j_662642039e1f0
Uploading file: /content/upload/6vxx.pdb
Job link for 6vxx.pdb : http://sts.bioe.uic.edu/castp/index.html?j_6626420e340d1
Waiting for 60 seconds before starting the download process...
Processing job link for 5x5f.pdb : http://sts.bioe.uic.edu/castp/index.html?j_662641f93101d
File downloaded and saved as: /content/download/5x5f_662641f93101d.zip
Processing job link for 6rr7.pdb : http://sts.bioe.uic.edu/castp/index.html?j_662642039e1f0
File downloaded and saved as: /content/download/6rr7_662642039e1f0.zip
Processing job link for 6vxx.pdb : http://sts.bioe.uic.edu/castp/index.html?j_6626420e340d1
File downloaded and saved as: /content/download/6vxx_6626420e340d1.zip
