# %% [markdown]
# Generate Full Swedish Stock Company CSV (Only Valid Tickers)

Produces `valid_swedish_company_data.csv` and `errors_log.csv`.
1. Uses **investpy** for *all* Swedish tickers
2. Resumes from any existing CSV
3. Retries any tickers in the prior `errors_log.csv`
4. Checks each ticker for at least one day of data via **yfinance**
5. Sleeps between calls to avoid rate-limit overload
----


In [None]:
# 1) Imports and globals
import investpy
import pandas as pd
import yfinance as yf
import time
import os

OUTPUT_CSV = "valid_swedish_company_data.csv"
ERROR_LOG_CSV = "errors_log.csv"
SLEEP_SEC = 1.0   # adjust if you hit rate-limits

In [None]:
# ## 2) Fetch **all** Swedish tickers via investpy

# %%
stocks_df = investpy.get_stocks(country="Sweden")
stocks_df['YahooTicker'] = stocks_df['symbol'].str.upper() + ".ST"
stocks_df['CompanyName'] = stocks_df['name']

tickers_list = stocks_df['YahooTicker'].tolist()
print(f"Found {len(tickers_list)} Swedish tickers via investpy.")

In [None]:
# — load successes
if os.path.exists(OUTPUT_CSV):
    df_out = pd.read_csv(OUTPUT_CSV)
    done = set(df_out['YahooTicker'].astype(str))
    print(f"Resuming – {len(done)} valid tickers already recorded.")
else:
    df_out = pd.DataFrame(columns=[
        "YahooTicker", "CompanyName", "Exchange"
    ])
    done = set()
    print("Starting fresh – no existing valid tickers CSV found.")

# — load previous errors to retry
if os.path.exists(ERROR_LOG_CSV):
    prev_err = pd.read_csv(ERROR_LOG_CSV)
    retry_tickers = prev_err['YahooTicker'].astype(str).tolist()
    print(f"Will retry {len(retry_tickers)} previously failed tickers.")
else:
    retry_tickers = []

In [None]:
# ## 4) Build ordered list: retry failures first, then new tickers

# %%
to_retry = [t for t in retry_tickers if t not in done]
to_new = [t for t in tickers_list if t not in done and t not in retry_tickers]
to_process = to_retry + to_new

print(f"{len(to_retry)} to retry, {len(to_new)} new → total {len(to_process)} tickers to process.")

In [None]:
# ## 5) Loop over `to_process`, validate data, checkpoint, and collect errors

# %%
errors = []
for ticker in to_process:
    print(f"→ Processing {ticker} …", end="", flush=True)
    try:
        tk = yf.Ticker(ticker)
        # check for at least one day of history
        hist = tk.history(period="1d")
        if hist.empty:
            msg = "no price data (empty history)"
            print(f" ✖ {msg}")
            errors.append({"YahooTicker": ticker, "Error": msg})
            continue
        # fetch info for metadata
        info = tk.info or {}
        name = info.get("shortName") or info.get("longName") or ticker
        exchange = info.get("exchange", "")

        # record valid ticker
        df_out = pd.concat([
            df_out,
            pd.DataFrame([{
                "YahooTicker": ticker,
                "CompanyName": name,
                "Exchange": exchange
            }])
        ], ignore_index=True)
        df_out.to_csv(OUTPUT_CSV, index=False)
        done.add(ticker)
        print(" ✔ recorded")
    except Exception as e:
        msg = str(e)
        print(f" ERROR: {msg}")
        errors.append({"YahooTicker": ticker, "Error": msg})
    finally:
        time.sleep(SLEEP_SEC)

print("Data collection pass complete.")

In [None]:
# ## 6) Save updated error log

# %%
if errors:
    pd.DataFrame(errors).to_csv(ERROR_LOG_CSV, index=False)
    print(f"Logged {len(errors)} errors to {ERROR_LOG_CSV}")
else:
    if os.path.exists(ERROR_LOG_CSV):
        os.remove(ERROR_LOG_CSV)
    print("No errors on this run.  👍")