In [18]:
# ecobici_scraper.py

import re
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Month mapping from Spanish to numeric (01-12)
MONTHS = {
    'enero': '01', 'febrero': '02', 'marzo': '03', 'abril': '04',
    'mayo': '05', 'junio': '06', 'julio': '07', 'agosto': '08',
    'septiembre': '09', 'setiembre': '09', 'octubre': '10',
    'noviembre': '11', 'diciembre': '12',
    'ene': '01', 'feb': '02', 'mar': '03', 'abr': '04',
    'may': '05', 'jun': '06', 'jul': '07', 'ago': '08',
    'sep': '09', 'oct': '10', 'nov': '11', 'dic': '12'
}

def extract_date(filename):
    filename = filename.lower()

    patterns = [
        r'(\d{4})[-_](\d{2})',
        r'(\d{4})[-_](\w{3,9})',
        r'ecobici[_-](\d{4})[_-](\w+)',
        r'datos[_]?abiertos[_-]?(\d{4})[_-]?(\w+)'
    ]

    for pattern in patterns:
        match = re.search(pattern, filename)
        if match:
            year, month = match.groups()
            month = MONTHS.get(month, month)
            return int(year), int(month) if month.isdigit() else None

    return None, None

def get_csv_links():
    base_url = 'https://ecobici.cdmx.gob.mx/datos-abiertos/'
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    links = []
    for a in soup.find_all('a', href=True):
        if '.csv' in a['href']:
            link = a['href']
            if not link.startswith('http'):
                link = 'https://datos.cdmx.gob.mx' + link
            links.append(link)
    return links

def build_dataframe(links):
    rows = []
    for url in links:
        filename = url.split('/')[-1]
        year, month = extract_date(filename)
        if year and month:
            rows.append({'year': year, 'month': month, 'url': url})
    return pd.DataFrame(rows)

# Entry point for manual execution
if __name__ == "__main__":
    links = get_csv_links()
    df = build_dataframe(links)
    
    # Sort the dataframe by year and month descending
    df_sorted = df.sort_values(['year', 'month'], ascending=False)

    # Save to CSV
    output_path = "../opt/catalog.csv"
    df_sorted.to_csv(output_path, index=False)
    print(f"Catalog saved to {output_path} with {len(df_sorted)} entries.")


Catalog saved to ../opt/catalog.csv with 180 entries.


In [16]:
df

Unnamed: 0,year,month,url
0,2025,1,https://datos.cdmx.gob.mx/wp-content/uploads/2025/02/2025-01.csv
1,2025,2,https://datos.cdmx.gob.mx/wp-content/uploads/2025/03/2025-02.csv
2,2024,1,https://datos.cdmx.gob.mx/wp-content/uploads/2024/02/ecobici_2024_enero.csv
3,2024,2,https://datos.cdmx.gob.mx/wp-content/uploads/2024/03/2024-02.csv
4,2024,3,https://datos.cdmx.gob.mx/wp-content/uploads/2024/04/datos_abiertos_2024_03-1-1.csv
5,2024,4,https://datos.cdmx.gob.mx/wp-content/uploads/2024/05/datos_abiertos_2024_04.csv
6,2024,5,https://datos.cdmx.gob.mx/wp-content/uploads/2024/06/2024-05-1.csv
7,2024,6,https://datos.cdmx.gob.mx/wp-content/uploads/2024/07/2024-06.csv
8,2024,7,https://datos.cdmx.gob.mx/wp-content/uploads/2024/08/datos_abiertos_2024_07.csv
9,2024,8,https://datos.cdmx.gob.mx/wp-content/uploads/2024/09/2024-08.csv


In [1]:
import requests
import csv

# URL del JSON
url = 'https://gbfs.mex.lyftbikes.com/gbfs/en/station_information.json'

# Obtener datos
response = requests.get(url)
data = response.json()

# Extraer estaciones
stations = data['data']['stations']

# Definir campos que queremos guardar
campos = ['station_id', 'name', 'short_name', 'lat', 'lon', 'capacity', 'is_charging', 'has_kiosk']

# Escribir CSV
with open('estaciones_ecobici.csv', 'w', newline='', encoding='utf-8') as archivo_csv:
    writer = csv.DictWriter(archivo_csv, fieldnames=campos)
    writer.writeheader()
    for estacion in stations:
        # Solo incluir los campos seleccionados
        fila = {k: estacion.get(k, '') for k in campos}
        writer.writerow(fila)

print("Archivo CSV generado sin usar pandas.")


Archivo CSV generado sin usar pandas.
