# Install Libraries

In [None]:
! pip install yfinance langchain_pinecone openai python-dotenv langchain-community sentence_transformers

Collecting langchain_pinecone
  Downloading langchain_pinecone-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting aiohttp<3.10,>=3.9.5 (from langchain_pinecone)
  Downloading aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)
Collecting pinecone-client<6.0.0,>=5.0.0 (from langchain_pinecone)
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.8 (from langchain-community)
  Downloading langchain-0.3.9-py3-none-any.whl.metadata (7.1 kB)
Colle

In [None]:
from langchain_pinecone import PineconeVectorStore
from openai import OpenAI
import dotenv
import json
import yfinance as yf
import concurrent.futures
from langchain_community.embeddings import HuggingFaceEmbeddings
from google.colab import userdata
from langchain.schema import Document
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone
import numpy as np
import requests
import os

In [None]:
def get_stock_info(symbol: str) -> dict:
    """
    Retrieves and formats detailed information about a stock from Yahoo Finance.

    Args:
        symbol (str): The stock ticker symbol to look up.

    Returns:
        dict: A dictionary containing detailed stock information, including ticker, name,
              business summary, city, state, country, industry, and sector.
    """
    data = yf.Ticker(symbol)
    stock_info = data.info

    properties = {
        "Ticker": stock_info.get('symbol', 'Information not available'),
        'Name': stock_info.get('longName', 'Information not available'),
        'Business Summary': stock_info.get('longBusinessSummary'),
        'City': stock_info.get('city', 'Information not available'),
        'State': stock_info.get('state', 'Information not available'),
        'Country': stock_info.get('country', 'Information not available'),
        'Industry': stock_info.get('industry', 'Information not available'),
        'Sector': stock_info.get('sector', 'Information not available')
    }

    return properties

In [None]:
def get_huggingface_embeddings(text, model_name="sentence-transformers/all-mpnet-base-v2"):
    """
    Generates embeddings for the given text using a specified Hugging Face model.

    Args:
        text (str): The input text to generate embeddings for.
        model_name (str): The name of the Hugging Face model to use.
                          Defaults to "sentence-transformers/all-mpnet-base-v2".

    Returns:
        np.ndarray: The generated embeddings as a NumPy array.
    """
    model = SentenceTransformer(model_name)
    return model.encode(text)


def cosine_similarity_between_sentences(sentence1, sentence2):
    """
    Calculates the cosine similarity between two sentences.

    Args:
        sentence1 (str): The first sentence for similarity comparison.
        sentence2 (str): The second sentence for similarity comparison.

    Returns:
        float: The cosine similarity score between the two sentences,
               ranging from -1 (completely opposite) to 1 (identical).

    Notes:
        Prints the similarity score to the console in a formatted string.
    """
    # Get embeddings for both sentences
    embedding1 = np.array(get_huggingface_embeddings(sentence1))
    embedding2 = np.array(get_huggingface_embeddings(sentence2))

    # Reshape embeddings for cosine_similarity function
    embedding1 = embedding1.reshape(1, -1)
    embedding2 = embedding2.reshape(1, -1)

    # Calculate cosine similarity
    similarity = cosine_similarity(embedding1, embedding2)
    similarity_score = similarity[0][0]
    print(f"Cosine similarity between the two sentences: {similarity_score:.4f}")
    return similarity_score


# Example usage
sentence1 = "I like walking to the park"
sentence2 = "I like walking to the office"

similarity = cosine_similarity_between_sentences(sentence1, sentence2)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Cosine similarity between the two sentences: 0.7394


In [None]:
aapl_info = get_stock_info('AAPL')
print(aapl_info)

{'Ticker': 'AAPL', 'Name': 'Apple Inc.', 'Business Summary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts, as well as advertising services include third-party licensing arrangements and its own advertising platforms. In addition, the company offers various subscription-based services, such as Apple Arcade, a game subscription service; Apple Fitness+, a personalized fitness service; Apple Music, which offers users a curated listening experien

In [None]:
aapl_description = aapl_info['Business Summary']
company_description = "I want to find companies that make food and are headquarted in California"

similarity = cosine_similarity_between_sentences(aapl_description, company_description)

Cosine similarity between the two sentences: 0.2793


# Get all the Stocks in the Stock Market

First, we need to get the symbols (also known as tickers) of all the stocks in the stock market


In [None]:
import requests
import json
def get_company_tickers():
    """
    Downloads and parses the Stock ticker symbols from the GitHub-hosted SEC company tickers JSON file.

    Returns:
        dict: A dictionary containing company tickers and related information.

    Notes:
        The data is sourced from the official SEC website via a GitHub repository:
        https://raw.githubusercontent.com/team-headstart/Financial-Analysis-and-Automation-with-LLMs/main/company_tickers.json
    """
    # URL to fetch the raw JSON file from GitHub
    url = "https://raw.githubusercontent.com/team-headstart/Financial-Analysis-and-Automation-with-LLMs/main/company_tickers.json"

    # Making a GET request to the URL
    response = requests.get(url)

    # Checking if the request was successful
    if response.status_code == 200:
        # Parse the JSON content directly
        company_tickers = json.loads(response.content.decode('utf-8'))

        # Optionally save the content to a local file for future use
        with open("company_tickers.json", "w", encoding="utf-8") as file:
            json.dump(company_tickers, file, indent=4)

        print("File downloaded successfully and saved as 'company_tickers.json'")
        return company_tickers
    else:
        print(f"Failed to download file. Status code: {response.status_code}")
        return None

company_tickers = get_company_tickers()

File downloaded successfully and saved as 'company_tickers.json'


In [None]:
company_tickers

{'0': {'cik_str': 1045810, 'ticker': 'NVDA', 'title': 'NVIDIA CORP'},
 '1': {'cik_str': 320193, 'ticker': 'AAPL', 'title': 'Apple Inc.'},
 '2': {'cik_str': 789019, 'ticker': 'MSFT', 'title': 'MICROSOFT CORP'},
 '3': {'cik_str': 1018724, 'ticker': 'AMZN', 'title': 'AMAZON COM INC'},
 '4': {'cik_str': 1652044, 'ticker': 'GOOGL', 'title': 'Alphabet Inc.'},
 '5': {'cik_str': 1326801, 'ticker': 'META', 'title': 'Meta Platforms, Inc.'},
 '6': {'cik_str': 1318605, 'ticker': 'TSLA', 'title': 'Tesla, Inc.'},
 '7': {'cik_str': 1067983,
  'ticker': 'BRK-B',
  'title': 'BERKSHIRE HATHAWAY INC'},
 '8': {'cik_str': 1046179,
  'ticker': 'TSM',
  'title': 'TAIWAN SEMICONDUCTOR MANUFACTURING CO LTD'},
 '9': {'cik_str': 1730168, 'ticker': 'AVGO', 'title': 'Broadcom Inc.'},
 '10': {'cik_str': 59478, 'ticker': 'LLY', 'title': 'ELI LILLY & Co'},
 '11': {'cik_str': 19617, 'ticker': 'JPM', 'title': 'JPMORGAN CHASE & CO'},
 '12': {'cik_str': 104169, 'ticker': 'WMT', 'title': 'Walmart Inc.'},
 '13': {'cik_str'

In [None]:
len(company_tickers)

9998

# Inserting Stocks into Pinecone

**1. Create an account on [Pinecone.io](https://app.pinecone.io/)**

**2. Create a new index called "stocks" and set the dimensions to 768. Leave the rest of the settings as they are.**

![Screenshot 2024-12-02 at 4 48 03 PM](https://github.com/user-attachments/assets/13b484da-cd00-4337-a4db-4779080eb42c)


**3. Create an API Key for Pinecone**

![Screenshot 2024-11-24 at 10 44 37 PM](https://github.com/user-attachments/assets/e7feacc6-2bd1-472a-82e5-659f65624a88)


**4. Store your Pinecone API Key within Google Colab's secrets section, and then enable access to it (see the blue checkmark)**


![Screenshot 2024-11-24 at 10 45 25 PM](https://github.com/user-attachments/assets/eaf73083-0b5f-4d17-9e0c-eab84f91b0bc)




In [None]:
from google.colab import userdata
pinecone_api_key = userdata.get("PINECONE_API_KEY")
os.environ['PINECONE_API_KEY'] = pinecone_api_key

index_name = "stocks"
namespace = "stock-descriptions"

hf_embeddings = HuggingFaceEmbeddings()
vectorstore = PineconeVectorStore(index_name=index_name, embedding=hf_embeddings)

  hf_embeddings = HuggingFaceEmbeddings()


In [None]:
# for idx, stock in company_tickers.items():
#     stock_ticker = stock['ticker']
#     stock_data = get_stock_info(stock_ticker)
#     stock_description = stock_data['Business Summary']

#     print(f"Processing stock {idx} / {len(company_tickers)} :", stock_ticker)

#     vectorstore_from_documents = PineconeVectorStore.from_documents(
#         documents=[Document(page_content=stock_description, metadata=stock_data)],
#         embedding=hf_embeddings,
#         index_name=index_name,
#         namespace=namespace
#     )

# Parallelizing

[![](https://mermaid.ink/img/pako:eNqNkl1rgzAUhv9KyMXYwF74cSVjYA0rhY6WKRQWe5FqWqU1cUm8GKX_ffmwa65Gc6Hn5LxvzmM8F1jzhsIUHgUZWlCiigG9MlwoXp_AqpPqdS_esmyzCsBivV7o10fxXgagLFbZDsxmb2COi44dzxRsBK-plDoBZSsoaXbuNPeU4941qWBBv0fKVEfOvud5yejh0NWdLr1U0LnMmts2eYhDMLsZgEHa3TV5aEXbEG-zZZnasiXfLMGnaScVeNKRHDiT1DNunRGF5pMFtUaAiCKe5h4hp84jHHks9mJ8mMjBRBOMrT9G45wommis84bjYThZHuPYwzA_xqeIHUU8UZjyYxDOiOIJwhj_uRKnzhOc3FHsdHgoiUNJJhRTfgzFGVEyoRijj0JZAwPYU9GTrtFjfDHbFVQt7WkFUx02RJzMMF21joyKFz-shqkSIw2g4OOxhemBnKXOxqEhiqKO6DHt_3YHwr44v-XXX7It6B4?type=png)](https://mermaid.live/edit#pako:eNqNkl1rgzAUhv9KyMXYwF74cSVjYA0rhY6WKRQWe5FqWqU1cUm8GKX_ffmwa65Gc6Hn5LxvzmM8F1jzhsIUHgUZWlCiigG9MlwoXp_AqpPqdS_esmyzCsBivV7o10fxXgagLFbZDsxmb2COi44dzxRsBK-plDoBZSsoaXbuNPeU4941qWBBv0fKVEfOvud5yejh0NWdLr1U0LnMmts2eYhDMLsZgEHa3TV5aEXbEG-zZZnasiXfLMGnaScVeNKRHDiT1DNunRGF5pMFtUaAiCKe5h4hp84jHHks9mJ8mMjBRBOMrT9G45wommis84bjYThZHuPYwzA_xqeIHUU8UZjyYxDOiOIJwhj_uRKnzhOc3FHsdHgoiUNJJhRTfgzFGVEyoRijj0JZAwPYU9GTrtFjfDHbFVQt7WkFUx02RJzMMF21joyKFz-shqkSIw2g4OOxhemBnKXOxqEhiqKO6DHt_3YHwr44v-XXX7It6B4)

In [None]:
# Initialize tracking lists
successful_tickers = []
unsuccessful_tickers = []

# Load existing successful/unsuccessful tickers
try:
    with open('successful_tickers.txt', 'r') as f:
        successful_tickers = [line.strip() for line in f if line.strip()]
    print(f"Loaded {len(successful_tickers)} successful tickers")
except FileNotFoundError:
    print("No existing successful tickers file found")

try:
    with open('unsuccessful_tickers.txt', 'r') as f:
        unsuccessful_tickers = [line.strip() for line in f if line.strip()]
    print(f"Loaded {len(unsuccessful_tickers)} unsuccessful tickers")
except FileNotFoundError:
    print("No existing unsuccessful tickers file found")

Loaded 336 successful tickers
Loaded 5 unsuccessful tickers


[![](https://mermaid.ink/img/pako:eNqFk0uLgzAQgP_KkMOe7MHXRZaC1baXvsDCwqqHrGarVJMSE9hS-983NnWxULceRmf4Pp2MyQVlLCfIQweOTwXsw4SCuvw4Eiw7wqpsxPsXn_r-bmXAcrtdqts6WuwN2Ecr3wB__blJYTKZwixe45JCwKjgrKoI7zwXdphjlVXwwfiR8CbVH9BxdjPbqKxlJTAlTDbVuYXAjDUNZveSHWcZaRromkj_F61etIbire8Xpt2b9tDslvpCdHrRGYrddF6Ibi-6D4vsBjqcUWDe_NCMl0TAG6gfwwmEWOA7FlgasEYBWwP2KOBowBkFXA24Y4COoW61DRklLcwvQUHUHooEFrK53hHrAbkX7WdF51nRfVLUcX4fs8y6ObawMON-phvyI2CGRVakyEA14TUuc7XnL52ZIFGQmiTIU4855scEJfSqOCwFi840Q57gkhiIM3kokPeNq0Zl8pRjQcISq4NT_1VPmH4y1ufXXwdkCM8?type=png)](https://mermaid.live/edit#pako:eNqFk0uLgzAQgP_KkMOe7MHXRZaC1baXvsDCwqqHrGarVJMSE9hS-983NnWxULceRmf4Pp2MyQVlLCfIQweOTwXsw4SCuvw4Eiw7wqpsxPsXn_r-bmXAcrtdqts6WuwN2Ecr3wB__blJYTKZwixe45JCwKjgrKoI7zwXdphjlVXwwfiR8CbVH9BxdjPbqKxlJTAlTDbVuYXAjDUNZveSHWcZaRromkj_F61etIbire8Xpt2b9tDslvpCdHrRGYrddF6Ibi-6D4vsBjqcUWDe_NCMl0TAG6gfwwmEWOA7FlgasEYBWwP2KOBowBkFXA24Y4COoW61DRklLcwvQUHUHooEFrK53hHrAbkX7WdF51nRfVLUcX4fs8y6ObawMON-phvyI2CGRVakyEA14TUuc7XnL52ZIFGQmiTIU4855scEJfSqOCwFi840Q57gkhiIM3kokPeNq0Zl8pRjQcISq4NT_1VPmH4y1ufXXwdkCM8)

In [None]:
def process_stock(stock_ticker: str) -> str:
    # Skip if already processed
    if stock_ticker in successful_tickers:
        return f"Already processed {stock_ticker}"

    try:
        # Get and store stock data
        stock_data = get_stock_info(stock_ticker)
        stock_description = stock_data['Business Summary']

        # Store stock description in Pinecone
        vectorstore_from_texts = PineconeVectorStore.from_documents(
            documents=[Document(page_content=stock_description, metadata=stock_data)],
            embedding=hf_embeddings,
            index_name=index_name,
            namespace=namespace
        )

        # Track success
        with open('successful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        successful_tickers.append(stock_ticker)

        return f"Processed {stock_ticker} successfully"

    except Exception as e:
        # Track failure
        with open('unsuccessful_tickers.txt', 'a') as f:
            f.write(f"{stock_ticker}\n")
        unsuccessful_tickers.append(stock_ticker)

        return f"ERROR processing {stock_ticker}: {e}"

def parallel_process_stocks(tickers: list, max_workers: int = 10) -> None:
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {
            executor.submit(process_stock, ticker): ticker
            for ticker in tickers
        }

        for future in concurrent.futures.as_completed(future_to_ticker):
            ticker = future_to_ticker[future]
            try:
                result = future.result()
                print(result)

                # Stop on error
                if result.startswith("ERROR"):
                    print(f"Stopping program due to error in {ticker}")
                    executor.shutdown(wait=False)
                    raise SystemExit(1)

            except Exception as exc:
                print(f'{ticker} generated an exception: {exc}')
                print("Stopping program due to exception")
                executor.shutdown(wait=False)
                raise SystemExit(1)

# Prepare your tickers
tickers_to_process = [company_tickers[num]['ticker'] for num in company_tickers.keys()]

# Process them
parallel_process_stocks(tickers_to_process, max_workers=10)

Already processed CMCSA
Already processed RCL
Already processed RELX
Already processed KLAC
Already processed GS
Already processed TEAM
Already processed ABNB
Already processed GM
Already processed REGN
Already processed ANZGY
Already processed WELL
Already processed TOELY
Already processed APH
Already processed MCO
Already processed OKE
Already processed PYPL
Already processed ADSK
Already processed HES
Already processed BN
Already processed AJG
Already processed DFS
Already processed MDLZ
Already processed STZ
Already processed RSG
Already processed DUK
Already processed HCA
Already processed BDX
Already processed CTAS
Already processed EQNR
Already processed EQIX
Already processed ISRG
Already processed GBTC
Already processed DEO
Already processed VRT
Already processed PBR
Already processed ING
Already processed CARR
Already processed WM
Already processed CI
Already processed CMI
Already processed CVS
Already processed WCN
Already processed ICE
Already processed BAESY
Already proces

KeyboardInterrupt: 

In [None]:
# def process_stock(stock_ticker: str) -> str:
#     # Skip if already processed
#     if stock_ticker in successful_tickers:
#         return f"Already processed {stock_ticker}"

#     try:
#         # Get stock data using the provided ticker symbol
#         stock_data = get_stock_info(stock_ticker)

#         # Extract the business summary from the stock data
#         stock_description = stock_data['Business Summary']

#         # Store the stock description in the Pinecone vector store
#         vectorstore_from_texts = PineconeVectorStore.from_documents(
#             documents=[Document(page_content=stock_description, metadata=stock_data)],  # Document with content and metadata
#             embedding=hf_embeddings,  # Hugging Face embeddings for vector representation
#             index_name=index_name,  # Name of the Pinecone index
#             namespace=namespace  # Namespace to logically organize vectors
#         )

#         # Log successful processing of the ticker
#         with open('successful_tickers.txt', 'a') as f:
#             f.write(f"{stock_ticker}\n")  # Append ticker to success log
#         successful_tickers.append(stock_ticker)  # Track success in the list

#         return f"Processed {stock_ticker} successfully"

#     except Exception as e:
#         # Log failure to process the ticker
#         with open('unsuccessful_tickers.txt', 'a') as f:
#             f.write(f"{stock_ticker}\n")  # Append ticker to failure log
#         unsuccessful_tickers.append(stock_ticker)  # Track failure in the list

#         return f"ERROR processing {stock_ticker}: {e}"

# def parallel_process_stocks(tickers: list, max_workers: int = 10) -> None:
#     # Create a ThreadPoolExecutor to process stock tickers in parallel
#     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
#         # Map each ticker to a future task for processing
#         future_to_ticker = {
#             executor.submit(process_stock, ticker): ticker  # Submit `process_stock` for each ticker
#             for ticker in tickers
#         }

#         # Iterate over completed futures as they finish
#         for future in concurrent.futures.as_completed(future_to_ticker):
#             ticker = future_to_ticker[future]  # Get the ticker corresponding to the future
#             try:
#                 # Retrieve the result of the future
#                 result = future.result()
#                 print(result)  # Print the result of processing

#                 # Stop execution if an error occurred
#                 if result.startswith("ERROR"):
#                     print(f"Stopping program due to error in {ticker}")
#                     executor.shutdown(wait=False)  # Shut down the executor early
#                     raise SystemExit(1)  # Exit the program with an error status

#             except Exception as exc:
#                 # Handle unexpected exceptions in processing
#                 print(f'{ticker} generated an exception: {exc}')
#                 print("Stopping program due to exception")
#                 executor.shutdown(wait=False)  # Shut down the executor early
#                 raise SystemExit(1)  # Exit the program with an error status

# # Prepare a list of tickers to process
# tickers_to_process = [company_tickers[num]['ticker'] for num in company_tickers.keys()]

# # Process the tickers in parallel with a specified maximum number of workers
# parallel_process_stocks(tickers_to_process, max_workers=10)


# Perform RAG

In [None]:
# Initialize Pinecone
pc = Pinecone(api_key=userdata.get("PINECONE_API_KEY"),)

# Connect to your Pinecone index
pinecone_index = pc.Index("stocks")

In [None]:
query = "What are some companies are involved with building data centers?"

In [None]:
raw_query_embedding = get_huggingface_embeddings(query)

In [None]:
top_matches = pinecone_index.query(vector=raw_query_embedding.tolist(), top_k=20, include_metadata=True, namespace="stock-descriptions")

In [None]:
top_matches

{'matches': [{'id': '05ec8f4a-899c-4898-b4bf-99cab908c2ec',
              'metadata': {'Business Summary': 'GDS Holdings Limited, together '
                                               'with its subsidiaries, '
                                               'develops and operates data '
                                               "centers in the People's "
                                               'Republic of China. The company '
                                               'offers colocation services '
                                               'comprising critical facilities '
                                               'space, customer-available '
                                               'power, racks, and cooling; '
                                               'managed hosting services, '
                                               'including business continuity '
                                               'and disaster recovery, network '
      

In [None]:
contexts = [item['metadata']['text'] for item in top_matches['matches']]

In [None]:
contexts = [
    {
        **item['metadata']  # Unpack all key-value pairs in 'metadata'
    }
    for item in top_matches['matches']
]

In [None]:
# Convert each context dictionary to a string
contexts_str = ["\n".join(f"{key}: {value}" for key, value in context.items()) for context in contexts[:7]]

# Join the stringified contexts
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts_str) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query


In [None]:
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts[ : 5]) + "\n-------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

TypeError: sequence item 0: expected str instance, dict found

In [None]:
print(augmented_query)

<CONTEXT>
Business Summary: GDS Holdings Limited, together with its subsidiaries, develops and operates data centers in the People's Republic of China. The company offers colocation services comprising critical facilities space, customer-available power, racks, and cooling; managed hosting services, including business continuity and disaster recovery, network management, data storage, system security, operating system, database, and server middleware services; managed cloud services; and consulting services. It serves cloud service providers, large Internet companies, financial institutions, telecommunications and IT service providers, and large domestic private sector and multinational corporations. The company was founded in 2001 and is headquartered in Shanghai, the People's Republic of China.
City: Shanghai
Country: China
Industry: Information Technology Services
Name: GDS Holdings Limited
Sector: Technology
State: Information not available
Ticker: GDS
text: GDS Holdings Limited, t

In [None]:
client = OpenAI(
  base_url="https://api.groq.com/openai/v1",
  api_key=userdata.get("GROQ_API_KEY")
)

In [None]:
system_prompt = f"""You are an expert at providing answers about stocks. Please answer my question provided. and give the answer in formate
<Company Name> (<Ticker>) : <Answer>
"""

llm_response = client.chat.completions.create(
    model="llama-3.1-70b-versatile",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": augmented_query}
    ]
)

response = llm_response.choices[0].message.content

In [None]:
print(response)

GDS Holdings Limited (GDS) : Involved in developing and operating data centers in the People's Republic of China, offering services such as colocation, managed hosting, and cloud services.

Iris Energy Limited (None, actual: IREN is the ticker for Iris Energy Limited in ASX and XRF is ticker for Iris Energy Limited in the OTCBB Market)  : Owns and operates bitcoin mining data centers, providing high-performance computing solutions and AI cloud services.

Hewlett Packard Enterprise Company (HPE) : Offers data center solutions, including servers, storage, and converged edge systems, as well as management and support services.

However, note that Hewlett Packard Enterprise Company is not primarily involved in data center real estate, the firm constructs, manages, markets, rents, and sells the equipment and services involved in running data centers.


In [None]:
df = yf.download(['MSFT', 'AAPL', 'VOO'], period='10y')

[*********************100%***********************]  3 of 3 completed


In [None]:
df.index

DatetimeIndex(['2024-11-04 00:00:00+00:00', '2024-11-05 00:00:00+00:00',
               '2024-11-06 00:00:00+00:00', '2024-11-07 00:00:00+00:00',
               '2024-11-08 00:00:00+00:00', '2024-11-11 00:00:00+00:00',
               '2024-11-12 00:00:00+00:00', '2024-11-13 00:00:00+00:00',
               '2024-11-14 00:00:00+00:00', '2024-11-15 00:00:00+00:00',
               '2024-11-18 00:00:00+00:00', '2024-11-19 00:00:00+00:00',
               '2024-11-20 00:00:00+00:00', '2024-11-21 00:00:00+00:00',
               '2024-11-22 00:00:00+00:00', '2024-11-25 00:00:00+00:00',
               '2024-11-26 00:00:00+00:00', '2024-11-27 00:00:00+00:00',
               '2024-11-29 00:00:00+00:00', '2024-12-02 00:00:00+00:00',
               '2024-12-03 00:00:00+00:00', '2024-12-04 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Date', freq=None)

In [None]:
import pandas as pd
import plotly.graph_objects as go

def plot_stock_data_interactive(df, tickers, price_type='Close'):
    """
    Creates an interactive plot for specified price type (e.g., Close, Open, High, Low).

    Args:
        df (pd.DataFrame): DataFrame with multi-index columns from yfinance download or similar.
        tickers (list): List of tickers to plot (e.g., ['AAPL', 'MSFT', 'VOO']).
        price_type (str): The type of price to plot (e.g., 'Close', 'Open', 'High', 'Low').
    """
    # Create the Plotly figure
    fig = go.Figure()

    # Add traces for each ticker
    for ticker in tickers:
        fig.add_trace(go.Scatter(
            x=df.index,
            y=df[(price_type, ticker)],
            mode='lines',
            name=ticker,
            hovertemplate=f"<b>{ticker}</b><br>Date: %{{x}}<br>{price_type} Price: $%{{y:.2f}}<extra></extra>"
        ))

    # Customize the layout
    fig.update_layout(
        title=f"Interactive {price_type} Prices Over Time",
        xaxis_title="Date",
        yaxis_title=f"{price_type} Price ($)",
        hovermode="x unified",
        legend_title="Stocks",
        template="plotly_dark"  # Optional: Use a dark theme
    )

    # Show the interactive plot
    fig.show()


# Example data preprocessing
df.columns = pd.MultiIndex.from_tuples(df.columns) if not isinstance(df.columns, pd.MultiIndex) else df.columns
df.index = pd.to_datetime(df.index)

# Extract tickers dynamically
tickers = df.columns.get_level_values(1).unique()

# Plot the stock data
plot_stock_data_interactive(df, tickers=tickers, price_type='Close')

In [None]:
av_api_key=userdata.get("ALPHA_VAN_API")

In [None]:
import requests

params = {
    'function':' NEWS_SENTIMENT',
    'tickers': 'AAPL,MSFT',
    'apikey': 'YOUR_API_KEY'
}
url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=TSLA&limit=100&apikey={av_api_key}'
r = requests.get(url)
data = r.json()

data

{'items': '50',
 'sentiment_score_definition': 'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish',
 'relevance_score_definition': '0 < x <= 1, with a higher score indicating higher relevance.',
 'feed': [{'title': 'Making Money From Crowd Stampede - Palantir Becomes Largest Defense Contractor In Blink Of An Eye - Apple  ( NASDAQ:AAPL ) ',
   'url': 'https://www.benzinga.com/trading-ideas/24/12/42325140/making-money-from-crowd-stampede-palantir-becomes-largest-defense-contractor-in-blink-of-an-eye',
   'time_published': '20241204T201243',
   'authors': ['The Arora Report'],
   'summary': 'To gain an edge, this is what you need to know today. PLTR trailing PE is 354. RTX trailing PE is 34. PLTR forward PE is 151. RTX forward PE is 19. PLTR price/sales is 65. RTX price/sales is 2. In the early trade, money flows are positive in Apple Inc AAPL, Amazon.com, Inc.',
   'banner_image': 'https://contribu

In [None]:
structured_data = []
for entry in data['feed']:
    base_info = {
        'title': entry['title'],
        'url': entry['url'],
        'time_published': entry['time_published'],
        'overall_sentiment_score': entry['overall_sentiment_score'],
        'overall_sentiment_label': entry['overall_sentiment_label'],
    }
    # Extract ticker details
    for ticker_info in entry.get('ticker_sentiment', []):
        detailed_info = base_info.copy()
        detailed_info.update({
            'ticker': ticker_info['ticker'],
            'ticker_relevance_score': ticker_info['relevance_score'],
            'ticker_sentiment_score': ticker_info['ticker_sentiment_score'],
            'ticker_sentiment_label': ticker_info['ticker_sentiment_label'],
        })
        structured_data.append(detailed_info)

# Convert to DataFrame for better visualization
df = pd.DataFrame(structured_data)

# Sort by relevance score in descending order
df_sorted = df.sort_values(by='ticker_relevance_score', ascending=False)

# Get the top 10 entries
top_df = df_sorted.head(35)



In [None]:
for _, row in df_sorted.head(10).iterrows():
    print(f"  {row['ticker_relevance_score']} - {row['overall_sentiment_label']} - {row['ticker']}  - {row['url']}")

  0.939207 - Neutral - TSLA  - https://www.benzinga.com/tech/24/12/42312826/teslas-0-interest-loan-offer-on-model-3-y-to-end-mid-december-customers-who-order-now-can-get-delivery-befor
  0.929758 - Somewhat-Bullish - TSLA  - https://www.benzinga.com/news/global/24/12/42322754/tesla-boosts-marketing-in-china-highlights-safety-features-during-year-end-sales-push-report
  0.859898 - Somewhat-Bullish - TSLA  - https://www.benzinga.com/24/12/42311290/trader-danny-moses-says-he-is-no-longer-shorting-tesla
  0.85567 - Somewhat-Bullish - CRYPTO:DOGE  - https://www.fool.com/investing/2024/12/04/where-will-dogecoin-be-in-1-year/
  0.831224 - Neutral - TSLA  - https://www.benzinga.com/analyst-ratings/analyst-color/24/12/42319796/tesla-analyst-lowers-q4-delivery-estimate-says-2-dynamics-will-determine-ev-stocks-
  0.804667 - Somewhat-Bullish - TSLA  - https://www.benzinga.com/24/12/42285783/tesla-bear-craig-irwin-turns-bullish-increases-price-target-from-85-to-380
  0.795641 - Bullish - TSLA  - ht

In [None]:
system_prompt = f"""You are an expert at providing stock predictions and if its a good stock to buy or sell. Please answer my question provided.
example format -

<Example>
Here are the top stocks to buy and sell based on the provided article summaries:

		**Best to Buy:**

		1. **Tesla (TSLA)** - BUY: Tesla's stock has been experiencing a surge in recent months, and analysts are predicting it will continue to rise due to its strong sales and innovative technology.
		2. **Credo Technology Group (CRDO)** - BUY: Credo Technology Group's AI play is being double-upgraded, with a forecast increase of almost 200%, making it a promising investment opportunity.
		3. **SOFI (SOFI)** - BUY: SOFI's stock is flashing bullish momentum, making it a good opportunity to buy.
		4. **Amazon (AMZN)** - BUY: Amazon's stock is part of the "Magnificent Seven" and is expected to perform well, making it a good investment opportunity.
		5. **NVIDIA (NVDA)** - BUY: NVIDIA's stock is part of the "Magnificent Seven" and is expected to perform well, making it a good investment opportunity.

		**Best to Sell:**

		1. **Tesla (TSLA)** - SELL (Contrarian): Some analysts are predicting a decline in Tesla's global sales due to a slowdown in Europe and the US, making it a good opportunity to sell.
		2. **Meta Platforms (META)** - SELL: Meta's stock is experiencing a decline due to concerns about its content moderation and misinformation, making it a good opportunity to sell.

		**Neutral Stocks:**

		1. **ChargePoint Holdings (CHPT)** - NEUTRAL: ChargePoint's stock is neither considered a strong buy or sell, as it's expected to take years for it to turn a profit.
		2. **CrowdStrike Holdings (CRWD)** - NEUTRAL: CrowdStrike's stock is neither considered a strong buy or sell, as it's expected to have neutral momentum.
		3. **MicroStrategy (MSTR)** - NEUTRAL: MicroStrategy's stock is experiencing some hiccups, but overall, its momentum is neutral.



    <Example/>
order by best to buy followed by best to sell.
list top 10 urls at bottom
"""

# Convert DataFrame to string
formatted_string = top_df.to_string(index=False)
query = formatted_string
llm_response = client.chat.completions.create(
    model="llama-3.1-70b-versatile",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": query}
    ]
)

response = llm_response.choices[0].message.content

print(response)

**Best to Buy:**

1. **Credo Technology Group (CRDO)** - BUY: Credo Technology Group's AI play is being double-upgraded, with a forecast increase of almost 200%, making it a promising investment opportunity.
2. **SOFI (SOFI)** - BUY: SOFI's stock is flashing bullish momentum, making it a good opportunity to buy.
3. **NVIDIA (NVDA)** - BUY: NVIDIA's stock is part of the "Magnificent Seven" and is expected to perform well, making it a good investment opportunity.
4. **Tesla (TSLA)** - BUY: Tesla's stock has been experiencing a surge in recent months, and analysts are predicting it will continue to rise due to its strong sales and innovative technology. 
5. **Amazon (AMZN)** - BUY: Amazon's stock is part of the "Magnificent Seven" and is expected to perform well, making it a good investment opportunity.
6. **General Motors (GM)** - BUY: General Motors' stock is experiencing a surge due to its decision to offload stakes in a battery plant at Michigan, making it a good opportunity to buy.
7