## Web scarping usando Python
### Recuperando datos de acciones de la bolsa de valores
usando requests y BeautifulSoup en www.groww.com

Funcion para capturar los datos de las acciones de la bolsa de valores de la pagina web www.groww.com

In [None]:

import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import time
import random
from datetime import datetime, timedelta
import schedule
def captura_datos():
    if datetime.today().weekday() == 5 or datetime.datetime.today().weekday() == 6:
        print("Fin de semana. No se ejecutará la captura de datos.")
        return  
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.1729.4'}
    urls = [
        'https://groww.in/us-stocks/nke',
        'https://groww.in/us-stocks/ko',
        'https://groww.in/us-stocks/msft',  
        'https://groww.in/us-stocks/axp', 
        'https://groww.in/us-stocks/amgn', 
        'https://groww.in/us-stocks/aapl', 
        'https://groww.in/us-stocks/ba', 
        'https://groww.in/us-stocks/csco', 
        'https://groww.in/us-stocks/gs', 
        'https://groww.in/us-stocks/ibm', 
        'https://groww.in/us-stocks/intc', 
        'https://groww.in/us-stocks/jpm', 
        'https://groww.in/us-stocks/mcd',
        'https://groww.in/us-stocks/crm', 
        'https://groww.in/us-stocks/vz', 
        'https://groww.in/us-stocks/v', 
        'https://groww.in/us-stocks/wmt',  
        'https://groww.in/us-stocks/dis' 
        
        ]
    datos_crudos = []

    for url in urls:
        print(f"Obteniendo datos de: {url}")

        try:
        
            respuesta = requests.get(url, headers=headers)

        
            if respuesta.status_code != 200:
                print(f"Fallo el fetch {url} (Status: {respuesta.status_code})")
                continue

            soup = BeautifulSoup(respuesta.text, 'html.parser')

            
            empresa = soup.find('h1', {'class': 'usph14Head displaySmall'})
            nombre_empresa = empresa.get_text(strip=True) if empresa else "N/A"

            precio = soup.find('span', {'class': 'uht141Pri contentPrimary displayBase'})
            precio_accion = precio.get_text(strip=True) if precio else "N/A"

            cambio = soup.find('div', {'class': ['uht141Day bodyBaseHeavy contentNegative','uht141Day bodyBaseHeavy contentPositive']})
            cambio_accion = cambio.get_text(strip=True) if cambio else "N/A"

            tabla_volumen = soup.find('table', {'class': 'tb10Table borderPrimary width100 usp100NoBorder usp100Table'})
            if tabla_volumen:
                filas = tabla_volumen.find_all('tr')  
                if len(filas) > 1:  
                    celdas = filas[1].find_all('td')  
                    if len(celdas) > 2:  
                        volumen_accion = celdas[2].get_text(strip=True)  
                    else:
                        volumen_accion = "N/A"
                else:
                    volumen_accion = "N/A"
            else:
                volumen_accion = "N/A"
            
            datos_crudos.append([nombre_empresa, precio_accion, cambio_accion, volumen_accion])

            print(f"✔ Fetch exitoso para: {nombre_empresa}")

        except Exception as e:
            print(f"Error del fetch {url}: {str(e)}")

        
        time.sleep(random.uniform(2, 5))


    df = pd.DataFrame(datos_crudos, columns=['Empresa', 'Precio', 'Cambio', 'Volumen'])
    df['Precio'] = df['Precio'].str.replace('$', '', regex=False).astype(float)
    def extraer_cambio(cadena):
    
        m = re.match(r'([+-]?\d+\.?\d*)\(([\d\.]+)%\)', cadena)
        if m:
            cambio_val = m.group(1)   
            cambio_pct = m.group(2)   
            return pd.Series([float(cambio_val), cambio_pct])
        else:
            return pd.Series([None, None])
    df[['Cambio_Valor', 'Cambio_Porcentaje']] = df['Cambio'].fillna('').apply(extraer_cambio)
    df.drop(columns=['Cambio'], inplace=True)
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    df.to_csv(f'Acciones_{timestamp}.csv', index=False, encoding='utf-8-sig')
def captura_datos_oro():
    if datetime.today().weekday() == 5 or datetime.datetime.today().weekday() == 6:
        print("Fin de semana. No se ejecutará la captura de datos.")
        return  
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.1729.4'}
    urls = [
        'https://es.investing.com/currencies/xau-usd'
            
        ]
    datos_oro = []

    for url in urls:
        print(f"Obteniendo datos de: {url}")

        try:
        
            respuesta = requests.get(url, headers=headers)

        
            if respuesta.status_code != 200:
                print(f"Fallo el fetch {url} (Status: {respuesta.status_code})")
                continue

            soup = BeautifulSoup(respuesta.text, 'html.parser')

            def extraer_valor(data_test):
                elemento = soup.find('dd', {'data-test': data_test})
                if elemento:
                    span = elemento.find('span', {'class': 'key-info_dd-numeric__ZQFIs'})
                    if span:
                        spans = span.find_all('span')
                        return spans[1].get_text(strip=True) if len(spans) > 1 else "N/A"
                return "N/A"
        
            ultimo_cierre = extraer_valor("prevClose")
            apertura = extraer_valor("open")
            valor_compra = extraer_valor("bid")
            datos_oro.append([ultimo_cierre, valor_compra, apertura])

            print(f"✔ Fetch exitoso para oro")

        except Exception as e:
            print(f"Error del fetch {url}: {str(e)}")
    df = pd.DataFrame(datos_oro, columns=[ "Último Cierre", "Apertura", "Valor de Compra"])

    # Mostrar el DataFrame
    print("\n Datos extraídos:")
    print(df)
    fecha_actual = datetime.now().strftime("%Y-%m-%d")
    df.to_csv(f"oro_{fecha_actual}.csv", index=False, encoding="utf-8")
schedule.every().day.at("15:30").do(captura_datos) 
schedule.every().day.at('15:33').do(captura_datos_oro) 
schedule.every().day.at("17:57").do(captura_datos)  
schedule.every().day.at("21:27").do(captura_datos)   
schedule.every().day.at("21:57").do(captura_datos)  
print("Scheduler iniciado. Esperando próximos horarios de captura...")

while True:
    schedule.run_pending()
    proximo_evento = schedule.next_run()
    tiempo_espera = (proximo_evento - datetime.now()).total_seconds()
        
    if proximo_evento:
        tiempo_espera = (proximo_evento - datetime.now()).total_seconds()
    else:
        ahora = datetime.now()
        primer_evento_mañana = ahora.replace(hour=15, minute=30, second=0, microsecond=0) + timedelta(days=1) 
        tiempo_espera = (primer_evento_mañana - ahora).total_seconds()

    print(f"Esperando {tiempo_espera:.0f} segundos hasta el próximo evento.")
    time.sleep(max(tiempo_espera-60,18000))

Funcion para capturar datos de oro

In [3]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import time
import random
from datetime import datetime, timedelta
def captura_datos_oro():
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.1729.4'}
    urls = [
        'https://es.investing.com/currencies/xau-usd'
            
        ]
    datos_oro = []

    for url in urls:
        print(f"Obteniendo datos de: {url}")

        try:
        
            respuesta = requests.get(url, headers=headers)

        
            if respuesta.status_code != 200:
                print(f"Fallo el fetch {url} (Status: {respuesta.status_code})")
                continue

            soup = BeautifulSoup(respuesta.text, 'html.parser')

            def extraer_valor(data_test):
                elemento = soup.find('dd', {'data-test': data_test})
                if elemento:
                    span = elemento.find('span', {'class': 'key-info_dd-numeric__ZQFIs'})
                    if span:
                        spans = span.find_all('span')
                        return spans[1].get_text(strip=True) if len(spans) > 1 else "N/A"
                return "N/A"
        
            ultimo_cierre = extraer_valor("prevClose")
            apertura = extraer_valor("open")
            valor_compra = extraer_valor("bid")
            datos_oro.append([ultimo_cierre, valor_compra, apertura])

            print(f"✔ Fetch exitoso para oro")

        except Exception as e:
            print(f"Error del fetch {url}: {str(e)}")
    df = pd.DataFrame(datos_oro, columns=[ "Último Cierre", "Apertura", "Valor de Compra"])

    # Mostrar el DataFrame
    print("\n Datos extraídos:")
    print(df)
    fecha_actual = datetime.now().strftime("%Y-%m-%d")
    df.to_csv(f"oro_{fecha_actual}.csv", index=False, encoding="utf-8")
captura_datos_oro()

Obteniendo datos de: https://es.investing.com/currencies/xau-usd
✔ Fetch exitoso para oro

 Datos extraídos:
  Último Cierre  Apertura Valor de Compra
0      3.019,28  3.052,99        3.012,28


Programacion del horario de captura de datos

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import time
import random
from datetime import datetime, timedelta
import schedule
def captura_datos():
    if datetime.today().weekday() == 5 or datetime.today().weekday() == 6:
        print("Fin de semana. No se ejecutará la captura de datos.")
        return  
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.1729.4'}
    urls = [
        'https://groww.in/us-stocks/nke',
        'https://groww.in/us-stocks/ko',
        'https://groww.in/us-stocks/msft',  
        'https://groww.in/us-stocks/axp', 
        'https://groww.in/us-stocks/amgn', 
        'https://groww.in/us-stocks/aapl', 
        'https://groww.in/us-stocks/ba', 
        'https://groww.in/us-stocks/csco', 
        'https://groww.in/us-stocks/gs', 
        'https://groww.in/us-stocks/ibm', 
        'https://groww.in/us-stocks/intc', 
        'https://groww.in/us-stocks/jpm', 
        'https://groww.in/us-stocks/mcd',
        'https://groww.in/us-stocks/crm', 
        'https://groww.in/us-stocks/vz', 
        'https://groww.in/us-stocks/v', 
        'https://groww.in/us-stocks/wmt',  
        'https://groww.in/us-stocks/dis' 
        
        ]
    datos_crudos = []

    for url in urls:
        print(f"Obteniendo datos de: {url}")

        try:
        
            respuesta = requests.get(url, headers=headers)

        
            if respuesta.status_code != 200:
                print(f"Fallo el fetch {url} (Status: {respuesta.status_code})")
                continue

            soup = BeautifulSoup(respuesta.text, 'html.parser')

            
            empresa = soup.find('h1', {'class': 'usph14Head displaySmall'})
            nombre_empresa = empresa.get_text(strip=True) if empresa else "N/A"

            precio = soup.find('span', {'class': 'uht141Pri contentPrimary displayBase'})
            precio_accion = precio.get_text(strip=True) if precio else "N/A"

            cambio = soup.find('div', {'class': ['uht141Day bodyBaseHeavy contentNegative','uht141Day bodyBaseHeavy contentPositive']})
            cambio_accion = cambio.get_text(strip=True) if cambio else "N/A"

            tabla_volumen = soup.find('table', {'class': 'tb10Table borderPrimary width100 usp100NoBorder usp100Table'})
            if tabla_volumen:
                filas = tabla_volumen.find_all('tr')  
                if len(filas) > 1:  
                    celdas = filas[1].find_all('td')  
                    if len(celdas) > 2:  
                        volumen_accion = celdas[2].get_text(strip=True)  
                    else:
                        volumen_accion = "N/A"
                else:
                    volumen_accion = "N/A"
            else:
                volumen_accion = "N/A"
            
            datos_crudos.append([nombre_empresa, precio_accion, cambio_accion, volumen_accion])

            print(f"✔ Fetch exitoso para: {nombre_empresa}")

        except Exception as e:
            print(f"Error del fetch {url}: {str(e)}")

        
        time.sleep(random.uniform(2, 5))


    df = pd.DataFrame(datos_crudos, columns=['Empresa', 'Precio', 'Cambio', 'Volumen'])
    df['Precio'] = df['Precio'].str.replace('$', '', regex=False).astype(float)
    def extraer_cambio(cadena):
    
        m = re.match(r'([+-]?\d+\.?\d*)\(([\d\.]+)%\)', cadena)
        if m:
            cambio_val = m.group(1)   
            cambio_pct = m.group(2)   
            return pd.Series([float(cambio_val), cambio_pct])
        else:
            return pd.Series([None, None])
    df[['Cambio_Valor', 'Cambio_Porcentaje']] = df['Cambio'].fillna('').apply(extraer_cambio)
    df.drop(columns=['Cambio'], inplace=True)
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    df.to_csv(f'Acciones_{timestamp}.csv', index=False, encoding='utf-8-sig')
captura_datos() 

Obteniendo datos de: https://groww.in/us-stocks/nke
✔ Fetch exitoso para: Nike Inc
Obteniendo datos de: https://groww.in/us-stocks/ko
✔ Fetch exitoso para: Coca-Cola Company The
Obteniendo datos de: https://groww.in/us-stocks/msft
✔ Fetch exitoso para: Microsoft Corporation
Obteniendo datos de: https://groww.in/us-stocks/axp
✔ Fetch exitoso para: American Express Co
Obteniendo datos de: https://groww.in/us-stocks/amgn
✔ Fetch exitoso para: Amgen Inc
Obteniendo datos de: https://groww.in/us-stocks/aapl
✔ Fetch exitoso para: Apple Inc
Obteniendo datos de: https://groww.in/us-stocks/ba
✔ Fetch exitoso para: Boeing Company The
Obteniendo datos de: https://groww.in/us-stocks/csco
✔ Fetch exitoso para: Cisco Systems Inc
Obteniendo datos de: https://groww.in/us-stocks/gs
✔ Fetch exitoso para: Goldman Sachs Group Inc The
Obteniendo datos de: https://groww.in/us-stocks/ibm
✔ Fetch exitoso para: International Business Machines Corp
Obteniendo datos de: https://groww.in/us-stocks/intc
✔ Fetch exi