In [None]:
!pip install polygon-api-client transformers torch bitsandbytes accelerate huggingface_hub

import os
from polygon import RESTClient
from datetime import datetime, timedelta
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import BitsAndBytesConfig
from huggingface_hub import login
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Set up Polygon API key
os.environ['POLYGON_API_KEY'] = 'POLYGON_API_KEY'

# Set up and use Hugging Face API token
HF_TOKEN = 'HF_TOKEN'
login(token=HF_TOKEN)

# Check if CUDA is available
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

In [2]:
# Polygon API setup
POLYGON_API_KEY = os.getenv("POLYGON_API_KEY")
client = RESTClient(POLYGON_API_KEY)

# Fetch aggregate data function
def fetch_aggregate_data(tickers, from_date, to_date):
    all_aggs = {}
    for ticker in tickers:
        retries = 3
        while retries > 0:
            try:
                aggs = client.get_aggs(
                    ticker=ticker,
                    multiplier=1,
                    timespan="day",
                    from_=from_date,
                    to=to_date,
                )
                all_aggs[ticker] = aggs
                break
            except Exception as e:
                print(f"Error fetching data for {ticker}: {e}")
                retries -= 1
                if retries > 0:
                    print(f"Retrying in 65 seconds...")
                    time.sleep(65) #Polygon free tier only allows 5 API calls per minute, so I delay the retries after it has gone through 5 tickers.
                else:
                    print(f"Failed to fetch data for {ticker} after retries.")
    return all_aggs

# Calculate stock statistics
def calculate_stock_statistics(all_aggs):
    stats = {}
    for ticker, aggs in all_aggs.items():
        closes = [agg.close for agg in aggs]
        volumes = [agg.volume for agg in aggs]
        stats[ticker] = {
            "avg_close": round(sum(closes) / len(closes), 2),
            "min_close": round(min(closes), 2),
            "max_close": round(max(closes), 2),
            "avg_volume": int(sum(volumes) / len(volumes)),
            "price_change": round(closes[-1] - closes[0], 2),
            "price_change_percent": round((closes[-1] - closes[0]) / closes[0] * 100, 2)
        }
    return stats

In [3]:
# Hugging Face model setup
model_id = "google/gemma-2-9b" #best free LLM I could find on Hugging Face
quantization_config = BitsAndBytesConfig(load_in_8bit=True) # Quantization

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=quantization_config
)

# Create pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=500,
    do_sample=True,
    temperature=0.5,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.2
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [6]:
# Main execution
tickers = ["AAPL", "MSFT", "GOOGL", "NVDA", "NKE"]
to_date = datetime.now()
from_date = to_date - timedelta(days=90) # Data from three months

# Fetch data
all_aggs = fetch_aggregate_data(tickers, from_date, to_date)

# Calculate statistics
stats = calculate_stock_statistics(all_aggs)

# Prepare prompt
prompt = "Based on the statistical data, what stock should I invest in?\n\n"
for ticker, data in stats.items():
    prompt += f"{ticker}:\n"
    prompt += f"  Average Close: ${data['avg_close']}\n"
    prompt += f"  Price Range: ${data['min_close']} - ${data['max_close']}\n"
    prompt += f"  Average Daily Volume: {data['avg_volume']:,}\n"
    prompt += f"  Price Change: ${data['price_change']} ({data['price_change_percent']}%)\n\n"

prompt += "Stock Suggestions:"

print("Prompt prepared. Ready for generation.")

Prompt prepared. Ready for generation.


In [None]:
# Generate response
print("Generating stock suggestions...")
response = pipe(prompt, max_length=2000, num_return_sequences=1, truncation=True)[0]['generated_text']

# Extract and print the generated suggestions
suggestions = response.split("Stock Suggestions:")[1].strip()
print("\nStock Suggestions:")
print(suggestions)