In [None]:
import yfinance as yf  # For stock data
import pandas as pd     
import numpy as np      
import ta               # Technical Analysis indicators
import matplotlib.pyplot as plt  
from tqdm import tqdm



In [None]:
import requests
import io

# NasdaqTrader.com provides live daily lists of all stocks
url = "ftp://ftp.nasdaqtrader.com/SymbolDirectory/nasdaqlisted.txt"

# Download the data
response = requests.get("https://www.nasdaqtrader.com/dynamic/SymDir/nasdaqlisted.txt")

# Clean up the text data
data = response.content.decode('utf-8').split('\n')

# Read into pandas (skip the last footer row)
nasdaq_data = pd.read_csv(io.StringIO('\n'.join(data[:-1])), sep='|')

# Get the tickers
nasdaq_tickers = nasdaq_data['Symbol'].to_list()

print(f"NASDAQ tickers found: {len(nasdaq_tickers)}")



In [None]:
# Get NYSE tickers too
response_nyse = requests.get("https://www.nasdaqtrader.com/dynamic/SymDir/otherlisted.txt")
data_nyse = response_nyse.content.decode('utf-8').split('\n')

nyse_data = pd.read_csv(io.StringIO('\n'.join(data_nyse[:-1])), sep='|')
nyse_tickers = nyse_data['ACT Symbol'].to_list()

print(f"NYSE tickers found: {len(nyse_tickers)}")


In [None]:
# Combine both lists and remove duplicates
all_tickers = list(set(nasdaq_tickers + nyse_tickers))

print(f"Total unique live tickers: {len(all_tickers)}")


In [None]:
# Remove any non-string tickers (NaN, numbers, etc.)
clean_tickers = [ticker for ticker in all_tickers if isinstance(ticker, str)]

# Now filter out tickers with suffixes
filtered_tickers = [ticker for ticker in clean_tickers if '.' not in ticker]

print(f"Tickers after removing suffixes: {len(filtered_tickers)}")


In [None]:
# Create empty list to hold final tickers
final_tickers = []

print("Checking prices to remove penny stocks...")

for ticker in tqdm(filtered_tickers):
    try:
        price = yf.Ticker(ticker).history(period="1d")['Close'].iloc[-1]
        if price >= 1:  # Keep only stocks priced $1 or more
            final_tickers.append(ticker)
    except:
        pass  # If data is missing or error occurs, skip the ticker

print(f"Tickers after removing penny stocks: {len(final_tickers)}")
print(final_tickers[:10])


In [None]:
pd.Series(final_tickers).to_csv("/Users/saamsani/Desktop/CMPT /stock_prediction_project/data/final_tickers.csv", index=False)

print("Saved final tickers to final_tickers.csv.")

In [None]:
import os

# Load tickers back from your CSV
final_tickers = pd.read_csv("/Users/saamsani/Desktop/CMPT /stock_prediction_project/data/final_tickers.csv", header=None)[0].tolist()
print(f"Tickers loaded: {len(final_tickers)}")

# Make sure the save folder exists
data_folder = "/Users/saamsani/Desktop/CMPT /stock_prediction_project/data/stock_data"
os.makedirs(data_folder, exist_ok=True)

print("Pulling 90 days of price data and saving for each ticker...")

# Loop through each ticker and download/save data
for ticker in tqdm(final_tickers):
    try:
        save_path = f"{data_folder}/{ticker}.csv"

        # If this ticker's CSV already exists, skip it
        if os.path.exists(save_path):
            continue

        # Download 90 days of data
        df = yf.download(ticker, period="90d")

        # Only save if there are at least 30 days of data
        if len(df) >= 30:
            df['Ticker'] = ticker
            df.to_csv(save_path)
    except Exception as e:
        print(f"Failed to get data for {ticker}: {e}")
