In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
BASE_URL = "https://www.economia-sniim.gob.mx/Nuevo/Consultas/MercadosNacionales/PreciosDeMercado/Agricolas/ResultadosConsultaFechaFrutasYHortalizas.aspx"

PARAMS = {
    "fechaInicio": "01/01/2025",
    "fechaFinal": "10/01/2026",
    "ProductoId": "133",
    "OrigenId": "-1",
    "Origen": "Todos",
    "DestinoId": "-1",
    "Destino": "Todos",
    "PreciosPorId": "2",
    "RegistrosPorPagina": "500",
}

HEADERS = {
    "User-Agent": "Mozilla/5.0",
    "Content-Type": "application/x-www-form-urlencoded",
    "Referer": BASE_URL,
}

In [4]:
session = requests.Session()
session.headers.update(HEADERS)

In [6]:
def extract_hidden_fields(soup):
    return {
        "__VIEWSTATE": soup.find("input", {"id": "__VIEWSTATE"})["value"],
        "__EVENTVALIDATION": soup.find("input", {"id": "__EVENTVALIDATION"})["value"],
    }
def parse_table(soup):
    table = soup.find("table", id="tblResultados")
    rows = []

    for tr in table.find_all("tr"):
        tds = tr.find_all("td")

        # Skip headers and category rows like "Frutas"
        if len(tds) != 8:
            continue
        if "encabACT2" in tds[0].get("class", []):
            continue

        rows.append([td.get_text(strip=True) for td in tds])

    return rows

In [7]:
response = session.post(BASE_URL, params=PARAMS)
soup = BeautifulSoup(response.text, "lxml")

In [9]:
all_rows = []
page = 1

while True:
    print(f"Scraping page {page}...")

    all_rows.extend(parse_table(soup))

    # Check pagination text "Página X de Y"
    pag_text = soup.find("span", id="lblPaginacion").get_text(strip=True)
    current, total = [
        int(x) for x in pag_text.replace("Página", "").split("de")
    ]

    if current >= total:
        break

    hidden = extract_hidden_fields(soup)

    payload = {
        **hidden,
        "__EVENTTARGET": "",
        "__EVENTARGUMENT": "",
        "ibtnSiguiente.x": "1",
        "ibtnSiguiente.y": "1",
    }

    response = session.post(BASE_URL, params=PARAMS, data=payload)
    soup = BeautifulSoup(response.text, "lxml")
    page += 1

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...


In [10]:
# --- STEP 2: Build DataFrame ---
columns = [
    "Fecha",
    "Presentación",
    "Origen",
    "Destino",
    "Precio Min",
    "Precio Max",
    "Precio Frec",
    "Observaciones",
]

df = pd.DataFrame(all_rows, columns=columns)

df.head()
print(f"\nTotal rows scraped: {len(df)}")


Total rows scraped: 17906


In [11]:
df.head()

Unnamed: 0,Fecha,Presentación,Origen,Destino,Precio Min,Precio Max,Precio Frec,Observaciones
0,Fecha,Presentación,Origen,Destino,Precio Mín,Precio Max,Precio Frec,Obs.
1,02/01/2025,Kilogramo,Jalisco,Aguascalientes: Centro Comercial Agropecuario ...,42.00,46.00,44.00,
2,02/01/2025,Kilogramo,Michoacán,Aguascalientes: Centro Comercial Agropecuario ...,58.00,62.00,60.00,
3,03/01/2025,Kilogramo,Jalisco,Aguascalientes: Centro Comercial Agropecuario ...,42.00,46.00,44.00,
4,03/01/2025,Kilogramo,Michoacán,Aguascalientes: Centro Comercial Agropecuario ...,58.00,62.00,60.00,


In [12]:
df.shape

(17906, 8)