In [None]:
from datetime import datetime, timedelta
import os, time
import pandas as pd
import numpy as np
import yfinance as yf

path_csv = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_componentes_indices.csv"
out_dir  = "/Users/marcomendieta/Documents/TFM/Data_stock/StockData_csv"

start_date = "2020-01-01"
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

max_retries_per_ticker = 3
sleep_between_retries_sec = 2

def sanitize_country(name):
    if not isinstance(name, str):
        return "unknown"
    safe = name.strip().lower().replace(" ", "_").replace("/", "-")
    return "".join(ch for ch in safe if ch.isalnum() or ch in ("_", "-"))

def fetch_one(ticker, start_date, end_date, max_retries=3, sleep_s=2):
    last_err = None
    for attempt in range(1, max_retries+1):
        try:
            df = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False, progress=False)
            if df is None or df.empty:
                return pd.DataFrame(columns=["ticker", "Close", "High", "Low", "Open", "Volume", "FECHA"])
            df = df.reset_index().rename(columns={"Date": "FECHA"})
            needed = ["Open", "High", "Low", "Close", "Volume", "FECHA"]
            for col in needed:
                if col not in df.columns:
                    df[col] = np.nan
            df["ticker"] = ticker
            df["FECHA"] = pd.to_datetime(df["FECHA"]).dt.strftime("%Y-%m-%d")
            df = df[["ticker", "Close", "High", "Low", "Open", "Volume", "FECHA"]]
            df = df.drop_duplicates(subset=["ticker", "FECHA"], keep="last").sort_values(["ticker", "FECHA"])
            return df
        except:
            if attempt < max_retries:
                time.sleep(sleep_s)
            else:
                return pd.DataFrame(columns=["ticker", "Close", "High", "Low", "Open", "Volume", "FECHA"])
    return pd.DataFrame(columns=["ticker", "Close", "High", "Low", "Open", "Volume", "FECHA"])

tickers_df = pd.read_csv(path_csv)
tickers_df = tickers_df.dropna(subset=["ticker_market", "country"]).copy()
tickers_df["ticker_market"] = tickers_df["ticker_market"].astype(str).str.strip()
tickers_df["country"] = tickers_df["country"].astype(str).str.strip().str.lower()

countries = sorted(tickers_df["country"].unique().tolist())

os.makedirs(out_dir, exist_ok=True)
resumen = []

for country in countries:
    subset = tickers_df[tickers_df["country"] == country].copy()
    tickers = sorted(subset["ticker_market"].unique().tolist())
    frames = [fetch_one(t, start_date, end_date, max_retries_per_ticker, sleep_between_retries_sec) for t in tickers]
    frames = [f for f in frames if not f.empty]
    if frames:
        out = pd.concat(frames, ignore_index=True)
        out = out[["ticker", "Close", "High", "Low", "Open", "Volume", "FECHA"]]
        country_safe = sanitize_country(country)
        out_file = os.path.join(out_dir, f"stockdata_{country_safe}.csv")
        out.to_csv(out_file, index=False)
        resumen.append({"country": country, "rows": len(out), "file": out_file})
    else:
        resumen.append({"country": country, "rows": 0, "file": ""})

resumen_df = pd.DataFrame(resumen).sort_values("country")
resumen_csv = os.path.join(out_dir, "stockdata_resumen_por_pais.csv")
resumen_df.to_csv(resumen_csv, index=False)
resumen_df



1 Failed download:
['DPW.DE']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['TEN.MI']: Timeout('Failed to perform, curl: (28) Operation timed out after 19675 milliseconds with 0 bytes received. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.')
