# **Downloading Interaction Data from miRWalk**

# Importing Libraries and Configurations

In [1]:
import os
import sys
import time

import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select

# Get the project root (two levels above)
sys.path.append(os.path.abspath(os.path.join('..', '..')))

from config import (
    BRCA_PROCESSED_FILES_DIRS,
    MIRWALK_BASE_URL,
    MIRWALK_DEFAULT_FILE_PATH,
    MIRWALK_EXTERNAL_DATA_DIR,
    MIRWALK_SPECIES_SELECTION,
)

# Functions

In [2]:
def create_driver(download_directory):
    # Configure the Selenium ChromeDriver
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-gpu')
    options.add_argument('--disable-extensions')

    # Define the download directory
    options.add_experimental_option(
        'prefs', {'download.default_directory': download_directory}
    )

    # Initiate the Chrome browser
    driver = webdriver.Chrome(options=options)

    return driver

def quit_driver(driver):
    # Close the Chrome browser
    driver.quit()

def export_mirna_targets(driver, mirna):
    # Access miRWalk website
    driver.get(MIRWALK_BASE_URL)

    # Find and fill the species selector
    species_input = Select(driver.find_element(By.NAME, 'species'))
    species_input.select_by_visible_text(MIRWALK_SPECIES_SELECTION)

    # Find and fill the microRNA identifier
    mirna_input = driver.find_element(By.NAME, 'mirna')
    mirna_input.send_keys(mirna)

    # Find and click in the search button
    search_btn = driver.find_element(By.XPATH, '//button[text()="search"]')
    search_btn.click()

    # Find and click in the export CSV button
    export_link = driver.find_element(By.LINK_TEXT, 'Export CSV')
    export_link.click()

    # Wait until the download is complete
    while not os.path.exists(MIRWALK_DEFAULT_FILE_PATH):
        time.sleep(5)

    # Change the name of the downloaded file
    file_name = f'{mirna}.csv'
    mirna_file_path = os.path.join(MIRWALK_EXTERNAL_DATA_DIR, file_name)
    os.rename(MIRWALK_DEFAULT_FILE_PATH, mirna_file_path)

# Loading Data

In [3]:
# Basal-like
df_basal_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['basal'], 'basal-like-expressed-mir.csv')
)
basal_mirs = set(df_basal_mirs['miRNA_region_id'].tolist())

# HER2-enriched
df_her2_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['her2'], 'her2-enriched-expressed-mir.csv')
)
her2_mirs = set(df_her2_mirs['miRNA_region_id'].tolist())

# Luminal A
df_luma_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_a'], 'luminal-a-expressed-mir.csv')
)
luma_mirs = set(df_luma_mirs['miRNA_region_id'].tolist())

# Luminal B
df_lumb_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['lum_b'], 'luminal-b-expressed-mir.csv')
)
lumb_mirs = set(df_lumb_mirs['miRNA_region_id'].tolist())

# Normal tissue
df_normal_mirs = pd.read_csv(
    os.path.join(BRCA_PROCESSED_FILES_DIRS['normal'], 'normal-tissue-expressed-mir.csv')
)
normal_mirs = set(df_normal_mirs['miRNA_region_id'].tolist())

# All expressed microRNAs
expressed_mirs = basal_mirs.union(her2_mirs)
expressed_mirs = expressed_mirs.union(luma_mirs)
expressed_mirs = expressed_mirs.union(lumb_mirs)
expressed_mirs = expressed_mirs.union(normal_mirs)

In [4]:
len(expressed_mirs)

717

# Downloading Data

In [5]:
# Create the ChromeDriver
driver = create_driver(str(MIRWALK_EXTERNAL_DATA_DIR))

for mirna in expressed_mirs:
    mirna_file_path = export_mirna_targets(driver, mirna)

# Close the ChromeDriver
quit_driver(driver)

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"link text","selector":"Export CSV"}
  (Session info: chrome=128.0.6613.84); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
#0 0x562fa6f662da <unknown>
#1 0x562fa6c34200 <unknown>
#2 0x562fa6c83aa2 <unknown>
#3 0x562fa6c83d41 <unknown>
#4 0x562fa6cc9964 <unknown>
#5 0x562fa6ca88fd <unknown>
#6 0x562fa6cc6dfd <unknown>
#7 0x562fa6ca8673 <unknown>
#8 0x562fa6c76473 <unknown>
#9 0x562fa6c7747e <unknown>
#10 0x562fa6f2d0db <unknown>
#11 0x562fa6f31071 <unknown>
#12 0x562fa6f199d5 <unknown>
#13 0x562fa6f31bf2 <unknown>
#14 0x562fa6efeb6f <unknown>
#15 0x562fa6f55248 <unknown>
#16 0x562fa6f55417 <unknown>
#17 0x562fa6f650cc <unknown>
#18 0x7fb6f05be609 start_thread
