# Working Code

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import os
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
import time
from datetime import date

# Set the number of cores to use
NUM_CORES = multiprocessing.cpu_count() 
# Load stock info
stock_info = pd.read_csv('stock_info.csv')[:10]

def mkdir(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)

def get_stock_data(ticker, period='5d'):
    try:  
        time.sleep(0.1)
        data = yf.download(ticker, period=period, interval='1d')  
        if 'Date' not in data.columns:
            data['Date'] = data.index
        return ticker, data  
    except Exception as e:
        print(f"Error downloading data for {ticker}: {e}")
        return ticker, None

def calculate_price_change(data):
    if len(data) < 2:
        return None
    current_price = data['Close'].iloc[-1]
    previous_close = data['Close'].iloc[-2]
    price_change = (current_price - previous_close) / previous_close * 100
    return round(price_change, 3)

def calculate_rsi(data, period=14):
    delta = data['Close'].diff(1)
    up = delta.copy()
    down = delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0
    avg_gain = up.ewm(com=period-1, adjust=False).mean()
    avg_loss = down.ewm(com=period-1, adjust=False).mean().abs()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi.iloc[-1]

def calculate_macd(data):  
    exp1 = data['Close'].ewm(span=12, adjust=False).mean()
    exp2 = data['Close'].ewm(span=26, adjust=False).mean()
    macd = exp1 - exp2
    signal = macd.ewm(span=9, adjust=False).mean()
    return macd.iloc[-1], signal.iloc[-1], macd.iloc[-1] - signal.iloc[-1]

def get_company_name(ticker):
    company_name = stock_info[stock_info['Ticker'] == ticker]['Name'].values
    return company_name[0] if len(company_name) > 0 else "Unknown"

def process_ticker(ticker, period='5d'):
    ticker, data = get_stock_data(ticker, period)
    if data is None or data.empty:
        return None, ticker, None

    if 'Date' not in data.columns: 
        date_index = data.index[-1] 
        data['Date'] = pd.Series(date_index, index=data.index)

    price_change = calculate_price_change(data) 
    rsi_score = calculate_rsi(data)
    macd, macd_signal, macd_hist = calculate_macd(data)
    
    if price_change is None or rsi_score is None or macd is None:
        return None, ticker, None

    current_date = data['Date'].iloc[-1]   
    previous_price = data['Close'].iloc[-2]
    company_name = get_company_name(ticker)

    result = [ticker, company_name, price_change, rsi_score, macd, macd_signal, macd_hist]
    return result, ticker, price_change

    result = [ticker, company_name, price_change, rsi_score, macd, macd_signal, macd_hist]
    return result, ticker, price_change

def process_chunk(chunk):
    return [process_ticker(ticker) for ticker in chunk]

def evaluate():
    all_tickers = stock_info['Ticker'].to_list()
    chunk_size = max(1, len(all_tickers) // NUM_CORES)
    chunks = [all_tickers[i:i + chunk_size] for i in range(0, len(all_tickers), chunk_size)]
    
    with ProcessPoolExecutor(max_workers=NUM_CORES) as executor:
        results = list(executor.map(process_chunk, chunks))
    
    processed_tickers = []
    
    for chunk_result in results:
        for result, ticker, price_change in chunk_result:
            if ticker and price_change is not None:
                processed_tickers.append(result)
    
    # Save all processed tickers with their metrics
    columns = ["Ticker", "Company", "Price Change (%)", "RSI Score", "MACD", "MACD Signal", "MACD Hist"]    
    processed_df = pd.DataFrame(processed_tickers, columns=columns)

    return processed_df

def main():
    today = date.today()  
    mkdir(f"{str(today)}")
    output_file = f'{str(today)}/processed_tickers.csv'
    
    df = evaluate()    
    df.to_csv(output_file, index=False)

if __name__ == '__main__':
    multiprocessing.freeze_support()
    main()


In [4]:
import yfinance as yf
import pandas as pd


In [5]:
today = pd.Timestamp('today').date()
stock_info = pd.read_csv('src/stock_info.csv')
processed_tickers = pd.read_csv(f'{str(today)}/processed_tickers.csv')

In [6]:
processed_tickers.head(3)

Unnamed: 0,Date,Ticker,Company Name,Previous Close,Current Close,Price Change (%),Open,Close,Adjusted Close,Price,P/E Ratio,Dividend Yield,Earnings Per Share,Book Value Per Share,Return on Equity,Debt to Equity,Beta
0,2024-07-05,A,Agilent Technologies,126.129997,126.370003,0.19,126.400002,126.370003,126.370003,,,0.0075,4.23,21.311,0.20709,43.869,1.049
1,2024-07-05,AA,Alcoa Inc.,42.380001,42.439999,0.142,42.650002,42.439999,42.439999,,,0.0094,-3.76,22.243,-0.14336,46.982,2.456
2,2024-07-05,AAN,Aaron's Inc,9.99,9.98,-0.1,9.97,9.98,9.98,,,0.0501,-0.79,21.895,-0.03515,105.026,1.425


In [7]:
import yfinance as yf
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
import multiprocessing

def get_stock_data(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info

        ticker_symbol = info['symbol'] if 'symbol' in info else np.nan
        company_name = info['longName'] if 'longName' in info else np.nan
        open_price = info['open'] if 'open' in info else np.nan
        close_price = info['previousClose'] if 'previousClose' in info else np.nan
        current_price = info['currentPrice'] if 'currentPrice' in info else np.nan
        price_change_pct = round((current_price - close_price) / close_price * 100, 3) if close_price is not None and current_price is not None else np.nan
        day_high = info['dayHigh'] if 'dayHigh' in info else np.nan
        day_low = info['dayLow'] if 'dayLow' in info else np.nan
        fifty_two_week_high = info['fiftyTwoWeekHigh'] if 'fiftyTwoWeekHigh' in info else np.nan
        fifty_two_week_low = info['fiftyTwoWeekLow'] if 'fiftyTwoWeekLow' in info else np.nan
        beta = info['beta'] if 'beta' in info else np.nan
        pe_ratio = info['trailingPE'] if 'trailingPE' in info else np.nan
        fwd_pe_ratio = info['forwardPE'] if 'forwardPE' in info else np.nan
        dividend_yield = info['dividendYield'] if 'dividendYield' in info else np.nan
        earnings_growth = info['earningsGrowth'] if 'earningsGrowth' in info else np.nan
        revenue_growth = info['revenueGrowth'] if 'revenueGrowth' in info else np.nan
        profit_margins = info['profitMargins'] if 'profitMargins' in info else np.nan
        roa = info['returnOnAssets'] if 'returnOnAssets' in info else np.nan
        roe = info['returnOnEquity'] if 'returnOnEquity' in info else np.nan
        debt_to_equity = info['debtToEquity'] if 'debtToEquity' in info else np.nan

        return [ticker_symbol, company_name, open_price, close_price, current_price, price_change_pct, day_high, day_low,
                fifty_two_week_high, fifty_two_week_low, beta, pe_ratio, fwd_pe_ratio, dividend_yield,
                earnings_growth, revenue_growth, profit_margins, roa, roe, debt_to_equity]
    except Exception as e:
        print(f"Error getting data for {ticker}: {e}")
        return [np.nan, "Unknown", np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]

def get_stock_info(tickers):
    num_processes = 10
    with ProcessPoolExecutor(max_workers=num_processes) as executor:
        results = list(executor.map(get_stock_data, tickers))

    columns = ['Ticker Symbol', 'Company Name', 'Open Price', 'Close Price', 'Current Price', 'Price Change (%)', 'Day High', 'Day Low',
               '52 Week High', '52 Week Low', 'Beta', 'P/E Ratio', 'Forward P/E Ratio', 'Dividend Yield',
               'Earnings Growth', 'Revenue Growth', 'Profit Margins', 'Return on Assets', 'Return on Equity', 'Debt to Equity']
    df = pd.DataFrame(results, columns=columns)
    return df

In [8]:
tickers = ['TSLA', 'AAPL', 'AMZN']
stock_info = get_stock_info(tickers)
print(stock_info)

Process SpawnProcess-1:
Traceback (most recent call last):
  File "/Users/josephlu/anaconda3/envs/xai/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/josephlu/anaconda3/envs/xai/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/josephlu/anaconda3/envs/xai/lib/python3.10/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/Users/josephlu/anaconda3/envs/xai/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'get_stock_data' on <module '__main__' (built-in)>
Process SpawnProcess-2:
Traceback (most recent call last):
  File "/Users/josephlu/anaconda3/envs/xai/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Users/josephlu/anaconda3/envs/xai/lib/python3.10/multiprocessing/process.py", lin

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.