# Get Stock Data

In [None]:
import yfinance as yf
import pandas as pd

# Select 10 companies from Nasdaq-100
nasdaq_companies = [ "ADBE", "AMD", "ABNB", "GOOGL", "GOOG", "AMZN", "AEP", "AMGN", "ADI", "ANSS",
                     "AAPL", "AMAT", "APP", "ARM", "ASML", "AZN", "TEAM", "ADSK", "ADP", "AXON", 
                     "BKR", "BIIB", "BKNG", "AVGO", "CDNS", "CDW", "CHTR", "CTAS", "CSCO", "CCEP", 
                     "CTSH", "CMCSA", "CEG", "CPRT", "CSGP", "COST", "CRWD", "CSX", "DDOG", "DXCM", 
                     "FANG", "DASH", "EA", "EXC", "FAST", "FTNT", "GEHC", "GILD", "GFS", "HON", 
                     "IDXX", "INTC", "INTU", "ISRG", "KDP", "KLAC", "KHC", "LRCX", "LIN", "LULU", 
                     "MAR", "MRVL", "MELI", "META", "MCHP", "MU", "MSFT", "MSTR", "MDLZ", "MDB", 
                     "MNST", "NFLX", "NVDA", "NXPI", "ORLY", "ODFL", "ON", "PCAR", "PLTR", "PANW", 
                     "PAYX", "PYPL", "PDD", "PEP", "QCOM", "REGN", "ROP", "ROST", "SBUX", "SNPS", 
                     "TTWO", "TMUS", "TSLA", "TXN", "TTD", "VRSK", "VRTX", "WBD", "WDAY", "XEL", "ZS" ]

# Define date range
start_date = "2022-01-01"
end_date = "2025-02-28"  # Updated date range

# Initialize a list to store data
all_data = []

for ticker in nasdaq_companies:
    stock = yf.Ticker(ticker)

    # Get historical market data (including Adjusted Close)
    hist_data = stock.history(start=start_date, end=end_date, auto_adjust=False)[["Open", "High", "Low", "Close", "Adj Close", "Volume"]]
    hist_data = hist_data.rename(columns={"Adj Close": "Adj_Close"})

    # Get financial ratios and company info
    financials = stock.info
    factors = {
        "PE": financials.get("trailingPE", None),
        "PB": financials.get("priceToBook", None),
        "PS": financials.get("priceToSalesTrailing12Months", None),
        "ROE": financials.get("returnOnEquity", None),
        "PM": financials.get("profitMargins", None),
        "IN": financials.get("ebitdaMargins", None),
        "Market_Cap": financials.get("marketCap", None),  # Get directly from Yahoo Finance
        "IndClass_Sector": financials.get("sector", "Unknown"),
        "IndClass_Industry": financials.get("industry", "Unknown")
    }

    # Convert financial factors to DataFrame and duplicate for all dates
    factors_df = pd.DataFrame([factors] * len(hist_data), index=hist_data.index)

    # Combine market data with financial factors
    combined_data = hist_data.join(factors_df)
    combined_data["Ticker"] = ticker  # Add ticker column
    all_data.append(combined_data)

# Merge all data into a single DataFrame
final_df = pd.concat(all_data)

# Save to CSV file
final_df.to_csv("stock_data.csv", index=True)

# Display sample data
print(final_df.head())

$CTAS: possibly delisted; no price data found  (1d 2022-01-01 -> 2025-02-28)


                                 Open        High         Low       Close  \
Date                                                                        
2022-01-03 00:00:00-05:00  566.650024  575.000000  553.270020  564.369995   
2022-01-04 00:00:00-05:00  563.900024  564.299988  541.309998  554.000000   
2022-01-05 00:00:00-05:00  535.000000  538.750000  514.000000  514.429993   
2022-01-06 00:00:00-05:00  510.000000  526.309998  508.200012  514.119995   
2022-01-07 00:00:00-05:00  513.659973  520.419983  504.000000  510.700012   

                            Adj_close     Volume        PE        PB  \
Date                                                                   
2022-01-03 00:00:00-05:00  564.369995  2862700.0  35.62813  13.77939   
2022-01-04 00:00:00-05:00  554.000000  4344800.0  35.62813  13.77939   
2022-01-05 00:00:00-05:00  514.429993  7394800.0  35.62813  13.77939   
2022-01-06 00:00:00-05:00  514.119995  3913600.0  35.62813  13.77939   
2022-01-07 00:00:00-05:00  5

# Get Index Data

In [None]:
import yfinance as yf
import pandas as pd

# Define index list and their Yahoo Finance tickers
indices = {
    "S&P 500": "^GSPC",
    "NASDAQ 100": "^NDX",
    "Russell 1000": "^RUI",
    "Russell 3000": "^RUA",
    "Wilshire 5000": "^W5000"
}

# Set the start and end date
start_date = "2022-01-01"
end_date = "2025-02-28"  # Change this to today’s date if needed

# List to store all data
all_data = []

# Loop through each index to fetch data
for index_name, ticker in indices.items():
    print(f"Fetching data for {index_name} ({ticker})...")
    
    # Download historical data with auto_adjust=False to get accurate 'Adj Close'
    stock = yf.Ticker(ticker)
    data = stock.history(start=start_date, end=end_date, auto_adjust=False)[["Open", "High", "Low", "Close", "Adj Close", "Volume"]]
    
    # Reset index to move Date from index to a column
    data.reset_index(inplace=True)
    
    # Add a Ticker and Index Name column
    data["Ticker"] = ticker
    data["Index"] = index_name  # Add a new column for index name
    
    # Rename columns to match required format
    data.rename(columns={
        "Adj Close": "Adj_close"
    }, inplace=True)
    
    # Select only the necessary columns
    data = data[["Date", "Open", "High", "Low", "Close", "Adj_Close", "Volume", "Ticker", "Index"]]
    
    # Append to the list
    all_data.append(data)

# Merge all index data into a single DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Save the data to a CSV file
final_df.to_csv("index_data.csv", index=False)

# Display the first few rows of the data
print(final_df.head())


Fetching data for S&P 500 (^GSPC)...
Fetching data for NASDAQ 100 (^NDX)...
Fetching data for Russell 1000 (^RUI)...
Fetching data for Russell 3000 (^RUA)...
Fetching data for Wilshire 5000 (^W5000)...
                       Date         Open         High          Low  \
0 2022-01-03 00:00:00-05:00  4778.140137  4796.640137  4758.169922   
1 2022-01-04 00:00:00-05:00  4804.509766  4818.620117  4774.270020   
2 2022-01-05 00:00:00-05:00  4787.990234  4797.700195  4699.439941   
3 2022-01-06 00:00:00-05:00  4693.390137  4725.009766  4671.259766   
4 2022-01-07 00:00:00-05:00  4697.660156  4707.950195  4662.740234   

         Close    Adj_close      Volume Ticker    Index  
0  4796.560059  4796.560059  3831020000  ^GSPC  S&P 500  
1  4793.540039  4793.540039  4683170000  ^GSPC  S&P 500  
2  4700.580078  4700.580078  4887960000  ^GSPC  S&P 500  
3  4696.049805  4696.049805  4295280000  ^GSPC  S&P 500  
4  4677.029785  4677.029785  4181510000  ^GSPC  S&P 500  
