In [1]:
import os
from datetime import datetime
import pandas as pd
import yfinance as yf
import csv

In [2]:
import os
import csv
from datetime import datetime
import yfinance as yf
import pandas as pd

def update_stock_csv_by_years(tickers, years, frequency='daily', folder='stocks_datasets'):
    interval_map = {'daily': '1d', 'hourly': '1h'}
    if frequency not in interval_map:
        raise ValueError("Frequency must be 'daily' or 'hourly'")

    os.makedirs(folder, exist_ok=True)

    file_path = os.path.join(folder, "new_data.csv")

    with open(file_path, 'w', newline='') as f:
        writer = csv.writer(f)

        for ticker in tickers:
            all_years_data = []

            # Download year by year
            for year in years:
                interval = interval_map[frequency]
                start_date = f"{year}-01-01"
                end_date = f"{year + 1}-08-23"

                if frequency == 'hourly' and (datetime.today().year - year) > 2:
                    print(f"⚠ Hourly data not available before ~2 years ago for {ticker} in {year}. Switching to daily.")
                    interval = '1d'

                try:
                    print(f"🔄 Fetching {interval} data for {ticker} in {year}...")
                    data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)

                    if not data.empty:
                        data.index = pd.to_datetime(data.index)
                        data.index.name = "Date"

                        # ✅ Flatten MultiIndex columns if present
                        if isinstance(data.columns, pd.MultiIndex):
                            data.columns = [c[0] for c in data.columns]

                        all_years_data.append(data)
                    else:
                        print(f"⚠ No data for {ticker} in {year}")
                except Exception as e:
                    print(f"❌ Error with {ticker} in {year}: {e}")

            if all_years_data:
                new_data = pd.concat(all_years_data)
                new_data.sort_index(inplace=True)

                # Keep only required columns
                cols_order = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
                cols_order_existing = [col for col in cols_order if col in new_data.columns]
                df_to_save = new_data[cols_order_existing].copy()

                # Rename 'Adj Close' → 'Price'
                df_to_save.rename(columns={'Adj Close': 'Price'}, inplace=True)

                # ✅ Write header block (3 lines) for each ticker
                writer.writerow(['Date'] + df_to_save.columns.tolist())
                writer.writerow(['Ticker'] + [ticker] * len(df_to_save.columns))
                writer.writerow(['Date'] + [''] * len(df_to_save.columns))

                # Data rows
                for idx, row in df_to_save.iterrows():
                    writer.writerow([idx.strftime('%Y-%m-%d')] + list(row.values))

        print(f"✅ Saved all tickers to {file_path}")


In [3]:
update_stock_csv_by_years(
    tickers=["TSLA"],
    years=range(2020, 2025),
    frequency="daily"
)

🔄 Fetching 1d data for TSLA in 2020...


  data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)


🔄 Fetching 1d data for TSLA in 2021...


  data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)


🔄 Fetching 1d data for TSLA in 2022...


  data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)


🔄 Fetching 1d data for TSLA in 2023...


  data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)


🔄 Fetching 1d data for TSLA in 2024...


  data = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)


✅ Saved all tickers to stocks_datasets\new_data.csv
