In [3]:
from concurrent.futures import ThreadPoolExecutor
import openai
import logging
import pandas as pd
import requests
from stocksymbol import StockSymbol
import time
import json
import hashlib
import yfinance as yf

# Configuration
news_api_key = "API KEY"
openai_api_key = "API KEY"

# Initialize OpenAI
openai.api_key = openai_api_key

# Set up logging
logging.basicConfig(filename='stock_selection.log', level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

# Value Investing Thresholds
# Set your thresholds according to what you are looking for in a stock
price_to_earnings = 10
price_to_book = 1.5
debt_to_equity = 1
roe = 0.12

In [4]:


def fetch_stock_data(ticker):
    stock = yf.Ticker(ticker)
    
    # Basic info
    info = stock.info
    history = stock.history(period="5d")

    return {
        "symbol": ticker.upper(),
        "current_price": info.get("currentPrice"),
        "market_cap": info.get("marketCap"),
        "pe_ratio": info.get("trailingPE"),
        "beta": info.get("beta"),
        "history": history,
        "summary": info.get("longBusinessSummary"),
        "sector": info.get("sector"),
        "industry": info.get("industry")
    }


In [5]:
data = fetch_stock_data("AAPL")

print("Current Price:", data["current_price"])
print("Market Cap:", data["market_cap"])
print("P/E Ratio:", data["pe_ratio"])
print("Summary:", data["summary"][:200], "...")


Current Price: 210.14
Market Cap: 3156744077312
P/E Ratio: 33.355553
Summary: Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal  ...


In [None]:
CACHE_FILE = "openai_cache.json"

def load_cache():
    if os.path.exists(CACHE_FILE):
        with open(CACHE_FILE, "r") as f:
            return json.load(f)
    else:
        return {}

def save_cache(cache):
    with open(CACHE_FILE, "w") as f:
        json.dump(cache, f)

def get_cache_key(messages):
    """Create a unique hash based on the input messages."""
    stringified = json.dumps(messages, sort_keys=True)
    return hashlib.sha256(stringified.encode()).hexdigest()


In [None]:
def ask_openai(messages, temperature=0.2, max_tokens=250, max_retries=3):
    cache = load_cache()
    cache_key = get_cache_key(messages)

    if cache_key in cache:
        print("Cache hit 🔥")
        return cache[cache_key]

    print("Cache miss ❄️. Calling OpenAI API...")
    attempt = 0
    while attempt < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
            )
            content = response['choices'][0]['message']['content'].strip()

            # Save to cache
            cache[cache_key] = content
            save_cache(cache)

            return content
        
        except (RateLimitError, APIError, Timeout, ServiceUnavailableError) as e:
            attempt += 1
            wait_time = 2 ** attempt
            print(f"OpenAI API error: {e}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
        
        except Exception as e:
            print(f"Unexpected error: {e}")
            break

    print("Failed to get response from OpenAI API after multiple attempts.")
    return None

In [None]:
# Sentiment analysis on the stock using OpenAI
def sentiment_analysis(ticker):
    prompt = f"Provide a sentiment analysis for stock {ticker} based on recent news and social media posts. Is the sentiment positive, negative, or neutral? Focus on key drivers (e.g., earnings reports, news events, market sentiment)."
    result = ask_openai([
        {"role": "system", "content": "You are a market sentiment analyst. Focus on key factors like news, earnings, and market sentiment."},
        {"role": "user", "content": prompt}
    ])
    if result is None:
        return "No sentiment analysis available"
    return result


# Analyse earnings calls for the stock using OpenAI
def earnings_call(ticker):
    prompt = f"Summarize the latest earnings call for stock {ticker}. Highlight key points such as management outlook, risks, opportunities, and financial performance."
    result = ask_openai([
        {"role": "system", "content": "You are a financial analyst. Provide key insights from the earnings call."},
        {"role": "user", "content": prompt}
    ])
    if result is None:
        return "No earnings call analysis available"
    return result


# Stock analysis using OpenAI
def stock_insights(ticker):
    prompt = f"Analyze stock {ticker}. Include its business model, growth prospects, financial performance, and risks. Provide key investment takeaways."
    result = ask_openai([
        {"role": "system", "content": "You are a financial analyst. Provide a summary of key investment insights."},
        {"role": "user", "content": prompt}
    ])
    if result is None:
        return "No stock insights available"
    return result


# Value investing analysis using OpenAI
def value_investing(ticker):
    prompt = f"Evaluate stock {ticker} from a value investor's perspective. Compare key metrics (PE ratio, PB ratio, ROE) to the industry average and provide investment recommendations."
    result = ask_openai([
        {"role": "system", "content": "You are a value investor. Compare key financial metrics with the industry and provide an investment recommendation."},
        {"role": "user", "content": prompt}
    ])
    if result is None:
        return "No value investing analysis available"
    return result
    
def full_stock_analysis(ticker):
    results = {}

    results['Sentiment Analysis'] = sentiment_analysis(ticker)
    results['Earnings Call Summary'] = earnings_call(ticker)
    results['Stock Insights'] = stock_insights(ticker)
    results['Value Investing Analysis'] = value_investing(ticker)

    return results



In [None]:
# Function to process each ticket and retrieve the metrics we want to review
# Print statements also included for de-bugging if needed
def process_stock(ticker):
    financial_data = get_financial_data(ticker)

    if financial_data is None:
        return None

    key_metrics, historical_price, income_statement, shares_float, ratios_data = financial_data

    try:
        market_price = historical_price['historical'][-1]['close']
    except KeyError:
        logging.error(f"Error: 'historical' key not found for {ticker}")
        return None
    # print("market price =", market_price)
    earnings_per_share = income_statement.get('eps')
    # print("eps = ", earnings_per_share)
    shares_outstanding = shares_float.get('outstandingShares')
    # print("shares_outstanding = ", shares_outstanding)
    book_value_per_share = key_metrics.get('bookValuePerShareTTM')
    # print("bookvalue per share= ", book_value_per_share)
    dividend_yield = key_metrics.get('dividendYieldTTM')
    # print("dividend yield= ", dividend_yield)
    price_to_earnings_ratio = key_metrics.get('peRatioTTM')
    # print("pte= ", price_to_earnings_ratio)
    price_to_book_ratio = key_metrics.get('ptbRatioTTM')
    # print("ptb= ", price_to_book_ratio)
    payout_ratio = key_metrics.get('payoutRatioTTM')
    # print("payout ratio= ", payout_ratio)
    debt_to_equity_ratio = key_metrics.get('debtToEquityTTM')
    # print("debt to eq ratio= ", debt_to_equity_ratio)
    roe_ratio = key_metrics.get('roeTTM')
    # print("roe ratio= ", roe_ratio)
    revenue_per_share = key_metrics.get('revenuePerShareTTM')
    # print("rev per share ratio= ", revenue_per_share)
    gross_profit_margin_ratio = ratios_data.get('grossProfitMarginTTM')
    # print("gross profit margin ratio= ", gross_profit_margin_ratio)
    price_to_sales_ratio = ratios_data.get('priceToSalesRatioTTM')
    # print("price_to_sales_ratio= ", gross_profit_margin_ratio)

    if (price_to_earnings_ratio is not None and price_to_book_ratio is not None and
            debt_to_equity_ratio is not None and roe_ratio is not None and
            price_to_earnings > price_to_earnings_ratio > 0 and
            price_to_book > price_to_book_ratio > 0 and
            debt_to_equity > debt_to_equity_ratio > 0 and
            roe_ratio > roe and roe_ratio > 0):
        sentiment_insight = sentiment_analysis(ticker)
        earnings_insight = earnings_call(ticker)
        stock_insight = stock_insights(ticker)
        value_insight = value_investing(ticker)

        return {
            'company': ticker,
            'market_price': market_price,
            'earnings_per_share': earnings_per_share,
            'book_value_per_share': book_value_per_share,
            'dividend_yield': dividend_yield,
            'shares_outstanding': shares_outstanding,
            'price_to_earnings_ratio': price_to_earnings_ratio,
            'price_to_book_ratio': price_to_book_ratio,
            'payout_ratio': payout_ratio,
            'debt_to_equity_ratio': debt_to_equity_ratio,
            'roe_ratio': roe_ratio,
            'revenue_per_share': revenue_per_share,
            'gross_profit_margin_ratio': gross_profit_margin_ratio,
            'price_to_sales_ratio': price_to_sales_ratio,
            'sentiment_insight': sentiment_insight,
            'earnings_insight': earnings_insight,
            'stock_insight': stock_insight,
            'value_insight': value_insight
        }

In [None]:
# Main execution to create the final spreadsheet / output
# Will print out the number of tickers processed to help with debugging or to determine if script has timed outd
columns = ['company', 'market_price', 'earnings_per_share', 'book_value_per_share', 'dividend_yield',
           'shares_outstanding', 'price_to_earnings_ratio', 'price_to_book_ratio', 'payout_ratio',
           'debt_to_equity_ratio', 'roe_ratio', 'revenue_per_share', 'gross_profit_margin_ratio',
           'price_to_sales_ratio', 'sentiment_insight', 'earnings_insight', 'stock_insight', 'value_insight']

df_portfolio = pd.DataFrame(columns=columns)

total_tickers = len(symbol_list_us)
tickers_processed = 0
tickers_added = 0

# ThreadPoolExecutor to speed up the process
with ThreadPoolExecutor() as executor:
    results = executor.map(process_stock, symbol_list_us)

    for result in results:
        tickers_processed += 1
        print(f"Tickers processed: {tickers_processed}/{total_tickers}")
        if result is not None:
            tickers_added += 1
            print(f"Tickers added: {tickers_added}")
            df_portfolio = pd.concat([df_portfolio, pd.DataFrame([result], columns=columns)], ignore_index=True)
            # Write the new row to the CSV file
            df_portfolio.to_csv('/Users/directory.csv', index=False,
                                mode='a', header=(tickers_added == 1))

    # Save the final portfolio to a CSV file
    df_portfolio.to_csv('/Users/directory.csv', index=False)