In [1]:
!pip install selenium webdriver-manager pandas openpyxl lxml

Collecting selenium
  Downloading selenium-4.39.0-py3-none-any.whl.metadata (7.5 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting urllib3<3.0,>=2.5.0 (from urllib3[socks]<3.0,>=2.5.0->selenium)
  Downloading urllib3-2.6.2-py3-none-any.whl.metadata (6.6 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.32.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Using cached trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting certifi>=2025.10.5 (from selenium)
  Using cached certifi-2025.11.12-py3-none-any.whl.metadata (2.5 kB)
Collecting attrs>=23.2.0 (from trio<1.0,>=0.31.0->selenium)
  Using cached attrs-25.4.0-py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Using cached outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket<1.0,>=0.12.2->selenium)
  Us

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
httpcore 1.0.2 requires h11<0.15,>=0.13, but you have h11 0.16.0 which is incompatible.


In [2]:
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import pandas as pd
import time
import os

In [8]:
import shutil

In [39]:
HS_CODES = ["28255000"]*84
MONTHS   = ["January", "February","March","April","May","June","July","August","September","October","November","December"]*7
YEARS    = ["2018"]*12+["2019"]*12+["2020"]*12+["2021"]*12+["2022"]*12+["2023"]*12+["2024"]*12

In [40]:
if not (len(HS_CODES) == len(MONTHS) == len(YEARS)):
    raise ValueError("HS_CODES, MONTHS, YEARS must have same length")

URL = "https://tradestat.commerce.gov.in/meidb/commodity_wise_all_countries_import"

In [41]:
OUTPUT_DIR = os.path.abspath("meidb_lithium_imports_monthwise")
# OUTPUT_DIR = os.path.abspath("meidb_imports_monthwise")
os.makedirs(OUTPUT_DIR, exist_ok=True)

chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option(
    "prefs",
    {
        "download.default_directory": OUTPUT_DIR,
        "download.prompt_for_download": False,
        "directory_upgrade": True,
        "safebrowsing.enabled": True
    }
)

In [42]:
driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=chrome_options
)
wait = WebDriverWait(driver, 30)
driver.get(URL)

In [43]:
def wait_for_excel(timeout=45):
    end = time.time() + timeout
    while time.time() < end:
        files = [f for f in os.listdir(OUTPUT_DIR)
                 if f.endswith(".xls") or f.endswith(".xlsx")]
        if files:
            return files[0]
        time.sleep(1)
    raise TimeoutError("Excel download did not complete")

In [44]:
for hs, month, year in zip(HS_CODES, MONTHS, YEARS):

    # HS Code
    hs_input = wait.until(EC.presence_of_element_located((By.NAME, "cwacimHSCODE")))
    hs_input.clear()
    hs_input.send_keys(hs)

    # Month
    Select(driver.find_element(By.NAME, "cwacimMonth")).select_by_visible_text(month)

    # Year
    Select(driver.find_element(By.NAME, "cwacimYear")).select_by_visible_text(year)

    # Values in
    Select(driver.find_element(By.NAME, "cwacimReportVal")) \
        .select_by_visible_text("US $ Million")

    # Year Type
    Select(driver.find_element(By.NAME, "cwacimReportYear")) \
        .select_by_visible_text("Financial Year")

    # Submit
    driver.find_element(By.XPATH, "//button[normalize-space()='Submit']").click()

    try:
        wait.until(
            EC.any_of(
                EC.presence_of_element_located((By.XPATH, "//button[normalize-space()='Excel']")),
                EC.presence_of_element_located((By.XPATH, "//*[contains(text(),'No Record')]"))
            )
        )
    except TimeoutException:
        print(f"Skipped (no response): HS={hs}, {month}-{year}")
        continue

    if "no record found" in driver.page_source.lower():
        print(f"No data: HS={hs}, {month}-{year}")
        continue
    try:
    # CLICK EXCEL (MANDATORY)
        driver.find_element(By.XPATH, "//button[normalize-space()='Excel']").click()

        downloaded = wait_for_excel()
        new_name = f"MEIDB_Import_HS{hs}_{month}_{year}.xlsx"

        shutil.move(
            os.path.join(OUTPUT_DIR, downloaded),
            os.path.join(OUTPUT_DIR, new_name)
        )

        print(f"Saved: {new_name}")
        time.sleep(2)
    except Exception as e:
        print(f"Error Occured {e}")

# driver.quit()

Saved: MEIDB_Import_HS28255000_January_2018.xlsx
Error Occured Message: stale element reference: stale element not found
  (Session info: chrome=143.0.7499.170); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#staleelementreferenceexception
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x3612d3
	0x361314
	0x14e6dd
	0x1608e6
	0x15f9cc
	0x155dbf
	0x154280
	0x1576c1
	0x157758
	0x19b122
	0x18f14f
	0x1bb4ec
	0x18ec34
	0x1bb6a4
	0x1ddb13
	0x1bb2e6
	0x18d321
	0x18e1d4
	0x5b5314
	0x5b08cb
	0x5cd1aa
	0x37b1d8
	0x3831dd
	0x3695d8
	0x369799
	0x353b28
	0x76835d49
	0x77a9d5db
	0x77a9d561
	0

Error Occured Message: stale element reference: stale element not found
  (Session info: chrome=143.0.7499.170); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#staleelementreferenceexception
Stacktrace:
Symbols not available. Dumping unresolved backtr