In [5]:
import yfinance as yf
import pandas as pd
import time

### Define Parameters

In [1]:
# Project time frame
start_date = "2015-01-01"
end_date = "2025-05-01"

# Benchmark index
benchmark_symbol = "^GSPC"

# Top 10 symbols from each sector (replace with your actual tickers if needed)
sectors = {
    "Choosen": ['AAPL', 'MSFT', 'AMZN', 'NFLX', 'MRNA', 'JNJ', 'ILMN', 'NEE', 'TSLA', 'FSLR'],
    "Benchmark": [benchmark_symbol]
}

# Flatten list of all tickers
all_symbols = [symbol for group in sectors.values() for symbol in group]

### Download Historical Stock Data with Sector Labels

In [7]:
def download_by_sector_batch(sectors_dict, start, end, interval=" 1d"):
    all_data = []

    for sector, symbols in sectors_dict.items():
        print(f"Downloading sector: {sector}")
        try:
            df = yf.download(symbols, start=start, end=end, interval=interval, group_by='ticker', progress=False)
            for symbol in symbols:
                if (symbol,) in df.columns:
                    # MultiIndex fallback (rare)
                    sub_df = df[symbol]
                else:
                    # Normal format
                    sub_df = df[symbol].copy()
                sub_df = sub_df.reset_index()
                sub_df["Symbol"] = symbol
                sub_df["Sector"] = sector
                all_data.append(sub_df)
        except Exception as e:
            print(f"Failed to download sector {sector}: {e}")
        time.sleep(5)
        
        
    combined = pd.concat(all_data)
    return combined.reset_index(drop=True)

stock_df = download_by_sector_batch(sectors, start_date, end_date)
stock_df.head()


Downloading sector: Choosen


  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:
  if (symbol,) in df.columns:


Downloading sector: Benchmark


Price,Date,Open,High,Low,Close,Volume,Symbol,Sector
0,2015-01-02,24.746232,24.75734,23.848711,24.288586,212818400.0,AAPL,Choosen
1,2015-01-05,24.057537,24.137514,23.417722,23.604334,257142000.0,AAPL,Choosen
2,2015-01-06,23.66876,23.866481,23.244437,23.606556,263188400.0,AAPL,Choosen
3,2015-01-07,23.815381,24.037539,23.704302,23.937569,160423600.0,AAPL,Choosen
4,2015-01-08,24.266369,24.915071,24.148623,24.857309,237458000.0,AAPL,Choosen


### Save to CSV

In [8]:
# Save the downloaded dataset
csv_path = "stock_sector_data.csv"
stock_df.to_csv(csv_path, index=False)
print(f"Saved dataset to {csv_path}")


Saved dataset to stock_sector_data.csv
