In [None]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
from pathlib import Path
from pandas.api.types import DatetimeTZDtype

start_date = '2020-01-01'
end_date = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')

TICKERS_PATH = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_indices.csv"
DF_TICKERS_PATH = "/Users/marcomendieta/Documents/TFM/Data_stock/Tickets_indices.csv"
INDEX_PATH = "/Users/marcomendieta/Documents/TFM/Data_stock/stockindex.csv"
out_dir = Path("/Users/marcomendieta/Documents/TFM/Data_stock/StockData_csv")
out_dir.mkdir(parents=True, exist_ok=True)

INCLUDE_ADJ_CLOSE = False
INCLUDE_METADATA = False

df_tickers = pd.read_csv(DF_TICKERS_PATH, low_memory=False)
index_df = pd.read_csv(INDEX_PATH)
index_df['Sufijo'] = index_df['Sufijo'].fillna("").astype(str).str.strip()
index_df['País'] = index_df['País'].astype(str).str.strip()

if 'ticker_market' in df_tickers.columns:
    df_tickers['ticker_market'] = df_tickers['ticker_market'].fillna("").astype(str).str.strip()
    ALL_TICKERS_COL = 'ticker_market'
elif 'ticker' in df_tickers.columns:
    df_tickers['ticker'] = df_tickers['ticker'].fillna("").astype(str).str.strip()
    ALL_TICKERS_COL = 'ticker'
else:
    raise ValueError("No se encontró columna de ticker en DF_TICKERS_PATH. Usa 'ticker' o 'ticker_market'.")

memb_cols = ['ticker', 'year', 'market', 'name', 'country', 'ticket', 'index']
cols_presentes = pd.read_csv(TICKERS_PATH, nrows=0).columns.tolist()
usecols = [c for c in memb_cols if c in cols_presentes]
memb = pd.read_csv(TICKERS_PATH, usecols=usecols, low_memory=False)

memb['ticker'] = memb['ticker'].astype(str).str.strip()
memb = memb.dropna(subset=['ticker', 'year']).copy()
memb['year'] = memb['year'].astype(int)

start_y, end_y = pd.to_datetime(start_date).year, pd.to_datetime(end_date).year
memb = memb[(memb['year'] >= start_y) & (memb['year'] <= end_y)].copy()

dups_mask = memb.duplicated(subset=['ticker', 'year'], keep=False)
num_dups = int(dups_mask.sum())
if num_dups > 0:
    print(f"⚠️ Aviso: {num_dups} filas con duplicado de (ticker, year) en membresías. Se resolverán por agregación.")

years_by_ticker = memb.groupby('ticker')['year'].apply(lambda s: sorted(set(s.astype(int)))).to_dict()

def first_nonnull(s):
    for x in s:
        if pd.notna(x):
            return x
    return pd.NA

agr = {c: first_nonnull for c in memb.columns if c not in ['ticker', 'year']}
memb_meta = (memb
             .sort_values(['ticker', 'year'])
             .groupby(['ticker', 'year'], as_index=False)
             .agg(agr))

meta_by_ticker_year = (memb_meta
                       .set_index(['ticker', 'year'])
                       .to_dict(orient='index'))

if INCLUDE_ADJ_CLOSE:
    OUT_COLS_DAILY = ["Ticker", "Year", "date", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
else:
    OUT_COLS_DAILY = ["Ticker", "Year", "date", "Open", "High", "Low", "Close", "Volume"]

if INCLUDE_METADATA:
    for extra in ["market", "country", "index"]:
        if extra in memb_cols and extra not in OUT_COLS_DAILY:
            OUT_COLS_DAILY.append(extra)

def normalize_to_yahoo_by_year(t: str, year: int) -> str:
    t0 = t.lstrip("$").strip()
    tU = t0.upper()
    if t0.endswith(".AS"):
        if tU.startswith("RDS-A"):
            return "RDSA.AS" if year <= 2021 else "SHELL.AS"
        if tU.startswith("RDS-B"):
            return "RDSB.AS" if year <= 2021 else "SHELL.AS"
        if tU == "RELX.AS":
            return "REN.AS"
    alias_map = {}
    return alias_map.get(t0, t0)

for _, row in index_df.iterrows():
    sufijo = row['Sufijo']
    pais   = row['País']
    tickers_all = df_tickers[df_tickers[ALL_TICKERS_COL].str.endswith(sufijo)][ALL_TICKERS_COL].tolist()
    tickers = [t for t in tickers_all if t in years_by_ticker]
    all_rows = []
    if not tickers:
        pd.DataFrame(columns=OUT_COLS_DAILY).to_csv(out_dir / f"stockprice_daily_{pais.replace(' ', '_')}.csv", index=False)
        continue
    for t in tickers:
        t_original = t.lstrip('$').strip()
        años_ok = years_by_ticker.get(t_original, [])
        if not años_ok:
            continue
        for y in años_ok:
            t_dl = normalize_to_yahoo_by_year(t_original, y)
            try:
                start_y_str = f"{y}-01-01"
                end_y_str   = f"{y+1}-01-01"
                hist = yf.Ticker(t_dl).history(start=start_y_str, end=end_y_str, auto_adjust=False)
                if hist is None or hist.empty:
                    continue
                hist = hist.reset_index()
                date_col = "Date" if "Date" in hist.columns else ("Datetime" if "Datetime" in hist.columns else None)
                if date_col is None:
                    continue
                hist.rename(columns={date_col: "date"}, inplace=True)
                if isinstance(hist["date"].dtype, DatetimeTZDtype):
                    hist["date"] = hist["date"].dt.tz_convert(None)
                hist["date"] = pd.to_datetime(hist["date"], errors="coerce")
                hist = hist.dropna(subset=["date"])
                if hist.empty:
                    continue
                part_cols = ["Open", "High", "Low", "Close", "Volume"]
                if INCLUDE_ADJ_CLOSE and "Adj Close" in hist.columns:
                    part_cols.insert(4, "Adj Close")
                part = hist[["date"] + [c for c in part_cols if c in hist.columns]].copy()
                part.insert(0, "Ticker", t_original)
                part.insert(1, "Year", y)
                if INCLUDE_METADATA:
                    meta = meta_by_ticker_year.get((t_original, y), {})
                    for extra_col in ["market", "country", "index"]:
                        if extra_col in memb_cols:
                            part[extra_col] = meta.get(extra_col)
                keep = [c for c in OUT_COLS_DAILY if c in part.columns]
                part = part[keep].sort_values(["Ticker", "Year", "date"])
                all_rows.append(part)
            except Exception:
                continue
    stockprice = pd.concat(all_rows, ignore_index=True) if all_rows else pd.DataFrame(columns=OUT_COLS_DAILY)
    if not stockprice.empty:
        stockprice.drop_duplicates(subset=["Ticker", "date"], keep="last", inplace=True)
        stockprice.sort_values(by=["Ticker", "Year", "date"], inplace=True)
    stockprice.to_csv(out_dir / f"stockprice_daily_{pais.replace(' ', '_')}.csv", index=False)

print("ok")


⚠️ Aviso: 14 filas con duplicado de (ticker, year) en membresías. Se resolverán por agregación.


$SGRE.MC: possibly delisted; no timezone found
$SGRE.MC: possibly delisted; no timezone found
$SGRE.MC: possibly delisted; no timezone found
$SGRE.MC: possibly delisted; no timezone found
$ORP.PA: possibly delisted; no timezone found
$ORP.PA: possibly delisted; no timezone found
$STM.PA: possibly delisted; no timezone found
$STM.PA: possibly delisted; no timezone found
$STM.PA: possibly delisted; no timezone found
$STM.PA: possibly delisted; no timezone found
$STM.PA: possibly delisted; no timezone found
$STM.PA: possibly delisted; no timezone found
$SEV.PA: possibly delisted; no timezone found
$SEV.PA: possibly delisted; no timezone found
$FTI.PA: possibly delisted; no timezone found
$FTI.PA: possibly delisted; no timezone found
$URW.PA: possibly delisted; no price data found  (1d 2020-01-01 -> 2021-01-01) (Yahoo error = "Data doesn't exist for startDate = 1577833200, endDate = 1609455600")
$URW.PA: possibly delisted; no price data found  (1d 2021-01-01 -> 2022-01-01) (Yahoo error = "

ok
