<a href="https://colab.research.google.com/github/mshumer/gpt-investor/blob/main/Claude_Investor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## claude-investor
By Matt Shumer (https://twitter.com/mattshumer_)

Github repo: https://github.com/mshumer/gpt-investor

In [None]:
%pip install yfinance requests beautifulsoup4 litellm loguru Jinja2

In [42]:
import os

# LLM PROVIDER SETTINGS

# # If you want to use Anthropic's Claude
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
# API_MODEL = "claude-3-opus-20240229"
# API_WEAK_MODEL = "claude-3-haiku-20240307"
# API_MAX_TOKENS = 8192

# # If you want to use OpenAI's GPT
# os.environ["OPENAI_API_KEY"] = "your-api-key"
# API_MODEL = "gpt-4o"
# API_WEAK_MODEL = "gpt-4o"
# API_MAX_TOKENS = 8192

# # If you want to use OpenRouter
# os.environ["OPENROUTER_API_KEY"] = "your-api-key"
# API_MODEL = "openrouter/meta-llama/llama-3-70b"
# API_WEAK_MODEL = "openrouter/meta-llama/llama-3-70b"
# API_MAX_TOKENS = 8192

# If you want to use GROQ
# os.environ["GROQ_API_KEY"] = "your-api-key"
# API_MODEL = "groq/llama3-70b-8192"
# API_WEAK_MODEL = "groq/llama3-70b-8192"
# API_MAX_TOKENS = 8192

# for other providers see https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs

# OTHER SETTINGS
STOCKS_AMOUNT = 5

In [38]:
import requests, ast, json, sys, traceback, re
import yfinance as yf
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from pprint import pprint
from loguru import logger
from litellm import completion, get_max_tokens
from json import JSONDecodeError
from jinja2 import Environment, FileSystemLoader, select_autoescape

logger.remove()
logger.add("Claude_Investor.log")
logger.info('============================================================')

def call_llm(model, messages):
    logger.info(messages)

    response = completion(model, messages, max_tokens=API_MAX_TOKENS)
    logger.info(response)

    return response['choices'][0]['message']['content']

def get_article_text(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        article_text = ' '.join([p.get_text() for p in soup.find_all('p')])
        return article_text
    except:
        return "Error retrieving article text."

def get_stock_data(ticker, years):
    end_date = datetime.now().date()
    start_date = end_date - timedelta(days=years*365)

    stock = yf.Ticker(ticker)

    # Retrieve historical price data
    hist_data = stock.history(start=start_date, end=end_date)

    # Retrieve balance sheet
    balance_sheet = stock.balance_sheet

    # Retrieve financial statements
    financials = stock.financials

    # Retrieve news articles
    news = stock.news

    return hist_data, balance_sheet, financials, news

def get_sentiment_analysis(ticker, news):
    news_text = ""
    for article in news:
        article_text = get_article_text(article['link'])
        timestamp = datetime.fromtimestamp(article['providerPublishTime']).strftime("%Y-%m-%d")
        news_text += f"\n\n---\n\nDate: {timestamp}\nTitle: {article['title']}\nText: {article_text}"

    messages = [
        {"role": "system", "content": f"You are a sentiment analysis assistant. Analyze the sentiment of the given news articles for {ticker} and provide a summary of the overall sentiment and any notable changes over time. Be measured and discerning. You are a skeptical investor."},

        {"role": "user", "content": f"News articles for {ticker}:\n{news_text}\n\n----\n\nProvide a summary of the overall sentiment and any notable changes over time."},
    ]

    response_text = call_llm(API_WEAK_MODEL, messages)

    return response_text

def get_analyst_ratings(ticker):
    stock = yf.Ticker(ticker)
    recommendations = stock.recommendations
    return recommendations

def get_industry_analysis(ticker):

    # TODO update to use search to find recent data!!

    stock = yf.Ticker(ticker)
    industry = stock.info['industry']
    sector = stock.info['sector']

    messages = [
        {"role": "system", "content": f"You are an industry analysis assistant. Provide an analysis of the {industry} industry and {sector} sector, including trends, growth prospects, regulatory changes, and competitive landscape. Be measured and discerning. Truly think about the positives and negatives of the stock. Be sure of your analysis. You are a skeptical investor."},

        {"role": "user", "content": f"Provide an analysis of the {industry} industry and {sector} sector."},
    ]

    response_text = call_llm(API_WEAK_MODEL, messages)

    return response_text


def get_final_analysis(ticker, comparisons, sentiment_analysis, analyst_ratings, industry_analysis):
    messages = [
        {"role": "system", "content": f"You are a financial analyst providing a final investment recommendation for {ticker} based on the given data and analyses. Be measured and discerning. Truly think about the positives and negatives of the stock. Be sure of your analysis. You are a skeptical investor."},

        {"role": "user", "content": f"Ticker: {ticker}\n\nComparative Analysis:\n{json.dumps(comparisons, indent=2)}\n\nSentiment Analysis:\n{sentiment_analysis}\n\nAnalyst Ratings:\n{analyst_ratings}\n\nIndustry Analysis:\n{industry_analysis}\n\nBased on the provided data and analyses, please provide a comprehensive investment analysis and recommendation for {ticker}. Consider the company's financial strength, growth prospects, competitive position, and potential risks. Provide a clear and concise recommendation on whether to buy, hold, or sell the stock, along with supporting rationale."},
    ]

    response_text = call_llm(API_MODEL, messages)

    return response_text

def generate_ticker_ideas(industry):
    messages = [
        {"role": "system", "content": f"You are a financial analyst assistant. Generate a list of {STOCKS_AMOUNT} ticker symbols for major companies in the {industry} industry, as a Python-parseable list."},

        {"role": "user", "content": f"Please provide a list of {STOCKS_AMOUNT} ticker symbols for major companies in the {industry} industry as a Python-parseable list. Only respond with the list, no other text."},
    ]

    response_text = call_llm(API_WEAK_MODEL, messages)

    ticker_list = ast.literal_eval(response_text)
    return [ticker.strip() for ticker in ticker_list]

def get_current_price(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period='1d', interval='1m')
    return data['Close'][-1]

def rank_companies(industry, analyses, prices):
    analysis_text = "\n\n".join(
        f"Ticker: {ticker}\nCurrent Price: {prices.get(ticker, 'N/A')}\nAnalysis:\n{analysis}"
        for ticker, analysis in analyses.items()
    )

    messages = [
        {"role": "system", "content": f"You are a financial analyst providing a ranking of companies in the {industry} industry based on their investment potential. Be discerning and sharp. Truly think about whether a stock is valuable or not. You are a skeptical investor."},

        {"role": "user", "content": f"Industry: {industry}\n\nCompany Analyses:\n{analysis_text}\n\nBased on the provided analyses, please rank the companies from most attractive to least attractive for investment. Provide a brief rationale for your ranking. In each rationale, include the current price (if available) and a price target."},
    ]

    response_text = call_llm(API_MODEL, messages)

    return response_text

def ranking_to_dict(ranking):
    messages = [
        {"role": "system", "content": f"You are a helpful assistant."},

        {"role": "user", "content": 'Your task is to convert the following stock ranking report into a JSON object. The JSON object should have the following format:\n\n```\n{\n    "introduction": "Report introduction. If the report doesn\'t contain an introduction, put here an empty string.",\n    "stocks":  [\n        {"rank": 1, "ticker": "AAPL", "name": "Apple Inc.", "current_price": "$200.00", "price_target": "$220.00", "description": "..."},\n        {"rank": 2, "ticker": "...", ...},\n        ...\n    ]\n    "conclusion": "Report conclusion. If the report doesn\'t contain a conclusion, put here an empty string.",\n}\n```\n\nPlease respond with JSON code only.\n\nHere is the report you should convert to JSON:\n\n' + ranking},
    ]

    while True:
        attempt = 1
        response_text = call_llm(API_MODEL, messages)
        response_json = response_text.strip().strip('`json')
        try:
            response_dict = json.loads(response_json)
            break

        except JSONDecodeError as e:
            logger.warning(f"Error parsing JSON" + (', trying again...' if attempt < 3 else '.'))
            messages.append({
                "role": "assistant",
                "content": response_text,
            })
            messages.append({
                "role": "user",
                "content": "Your response is not valid JSON. Please try again. Output JSON code only.",
            })

        finally:
            attempt += 1
            if attempt >= 3:
                break

    return response_dict

def build_report(industry, ranking, final_analyses, prices, sentiment_analyses, analyst_ratings, industry_analyses):
    ranking_dict = ranking_to_dict(ranking)

    env = Environment(
        loader=FileSystemLoader('')
    )
    template = env.get_template("Claude_Investor.md.jinja")
    report = template.render(industry=industry, ranking_dict=ranking_dict, final_analyses=final_analyses, prices=prices, sentiment_analyses=sentiment_analyses, analyst_ratings=analyst_ratings, industry_analyses=industry_analyses, now=datetime.now())

    return report

def save_report(report, industry, language = 'English'):
    now_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    with open(f"reports/{now_str}.{industry}.{language}.md", "w", encoding="utf-8") as f:
        f.write(report)

def translate(text, language):
    # Split the text into smaller peaces
    parts = re.split('\n(?=##? )', f"\n{text}")

    # Translate each part
    translated = []
    for i, part in enumerate(parts):
        if part.strip() == '':
            continue

        messages = [
            {"role": "system", "content": f"You are a professional translator specializing in finance and the stock market documents translation. Your task is to translate finantial documents from English to {language}. Every time user sends you a new document, you should translate it and send back the translation. Respond with the translation only, nothing else. The documents will be sent to you in Markdown syntax, you should preserve the Markdown syntax. Translate all the teext, but don't translate company names and \"Price target\" term. In the next message, user will send you the first document for translation to {language}."},

            {"role": "user", "content": part},
        ]
        response_text = call_llm(API_MODEL, messages)

        translated.append(response_text)

    return "\n\n".join(translated)

In [None]:
# User input
industry = input("Enter the industry to analyze: ")
years = 1 # int(input("Enter the number of years for analysis: "))

# Generate ticker ideas for the industry
tickers = generate_ticker_ideas(industry)
print(f"\nTicker Ideas for {industry} Industry:")
print(", ".join(tickers))

# Perform analysis for each company
sentiment_analyses = {}
analyst_ratings = {}
industry_analyses = {}
final_analyses = {}
prices = {}
for ticker in tickers:
    print(f"\nAnalyzing {ticker}...")

    hist_data, balance_sheet, financials, news = get_stock_data(ticker, years)
    main_data = {
        'hist_data': hist_data,
        'balance_sheet': balance_sheet,
        'financials': financials,
        'news': news
    }
    sentiment_analysis = get_sentiment_analysis(ticker, news)
    analyst_rating = get_analyst_ratings(ticker)
    industry_analysis = get_industry_analysis(ticker)
    final_analysis = get_final_analysis(ticker, {}, sentiment_analysis, analyst_rating, industry_analysis)

    sentiment_analyses[ticker] = sentiment_analysis
    analyst_ratings[ticker] = analyst_rating
    industry_analyses[ticker] = industry_analysis
    final_analyses[ticker] = final_analysis
    prices[ticker] = get_current_price(ticker)

# Rank the companies based on their analyses
ranking = rank_companies(industry, final_analyses, prices)

print(f"\nRanking of Companies in the {industry} Industry:")
print(ranking)

In [14]:
report = build_report(industry, ranking, final_analyses, prices, sentiment_analyses, analyst_ratings, industry_analyses)
save_report(report, industry)

In [43]:
report_czech = translate(report, 'Czech')
save_report(report_czech, industry, 'Czech')