# Setup

In [None]:
!pip install openai
!pip install -qU pypdf
!pip install langchain langchain-community langchain-openai
!pip install sentence-transformers faiss-cpu rank_bm25

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-community
  Downloading langchain_community-0.3.12-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain
  Downloading langchain-0.3.12-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.24 (from langchain)
  Downloading langchain_core-0.3.25-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.0-py3-none-any.whl.metadata (3.5 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.

In [None]:
# Libraries
import yfinance as yf
import pandas as pd
import os
import json
import numpy as np
import openai

from IPython.display import display, Markdown

In [None]:
# Import the userdata module from Google Colab
from google.colab import userdata
# Retrieve the API key
api_key = userdata.get('OPENAI_API_KEY')

In [None]:
# set the key in environment
os.environ['OPENAI_API_KEY'] = api_key
client = openai.OpenAI()

## 1. User portfolio data

In [None]:
def save_user_portfolio(user_name, asset_name, units, avg_cost):
    # Convert user_name to lowercase for consistency
    user_name = user_name.lower()
    # Replace spaces in user_name with underscores for file naming
    user_name = '_'.join(user_name.split())

    # Create the 'data' directory if it does not exist
    data_folder = 'data'
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)  # Create the folder if it doesn't exist

    # Define the file path using the user_name to make it identifiable
    file_path = os.path.join(data_folder, f"{user_name}_portfolio.json")

    # Create a dictionary to store the asset data
    portfolio_data = {
        "asset_name": asset_name.upper(),  # Store asset name in uppercase for consistency
        "units": units,  # Store the number of units for the asset
        "avg_cost": avg_cost  # Store the average cost of the asset
    }

    # If the file already exists, load existing data and update it
    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            try:
                # Try to load existing data from the JSON file
                existing_data = json.load(file)
            except json.JSONDecodeError:
                # If the JSON is invalid, initialize with an empty dictionary
                existing_data = {}
    else:
        # If the file doesn't exist, start with an empty dictionary
        existing_data = {}

    # Update the portfolio with the new asset data, using the asset name as the key
    existing_data[asset_name.upper()] = portfolio_data

    # Write the updated portfolio data back to the file in JSON format
    with open(file_path, 'w') as file:
        json.dump(existing_data, file, indent=4)  # Pretty-print with an indent of 4 spaces


## 2. Portfolio analysis

In [None]:
def get_stock_metrics(portfolio_data):
    # Extract the list of asset symbols from the portfolio data
    assets = list(portfolio_data.keys())

    # Download adjusted closing prices for the assets over the past year
    price_data = yf.download(assets, period='1y')['Adj Close']

    # Initialize an empty dictionary to store metrics for each stock
    metrics = {}

    # Iterate through each asset to gather and store its metrics
    for i in range(len(assets)):
        # Get detailed information about the stock using the yfinance Ticker object
        stock_info = yf.Ticker(assets[i]).info

        # Store various stock metrics for the asset in the metrics dictionary
        metrics[assets[i]] = {
            "units_held": portfolio_data[assets[i]]['units'],  # Number of units held
            "avg_cost": portfolio_data[assets[i]]['avg_cost'],  # Average cost of the asset
            "current_price": stock_info.get('currentPrice', 'N/A'),  # Current market price
            "previous_close": stock_info.get('previousClose', 'N/A'),  # Previous closing price
            "52_week_high": stock_info.get('fiftyTwoWeekHigh', 'N/A'),  # 52-week high price
            "52_week_low": stock_info.get('fiftyTwoWeekLow', 'N/A'),  # 52-week low price
            "dividend_yield": stock_info.get('dividendYield', 'N/A'),  # Dividend yield percentage
            "market_cap": stock_info.get('marketCap', 'N/A'),  # Market capitalization of the company
            "pe_ratio": stock_info.get('trailingPE', 'N/A'),  # Price-to-Earnings ratio
            "eps": stock_info.get('trailingEps', 'N/A'),  # Earnings per share
        }

    # Return the historical price data and the calculated metrics for each stock
    return price_data, metrics


In [None]:
# Function to calculate expected returns and covariance
def calculate_returns(prices):
  try:
    # Calculate daily returns
    returns = prices.pct_change().dropna()  # Returns should be a DataFrame
    # Calculate expected returns (mean) for each asset
    expected_returns = returns.mean()
    # Calculate the covariance matrix for the returns
    covariance_matrix = returns.cov()
    return expected_returns, covariance_matrix

  except:
    # Only has 1 asset
    return None, None

In [None]:
def portfolio_analysis(metrics, expected_returns, covariance_matrix):
    # Define a system prompt to instruct the AI model on its role and approach
    system_prompt = """
    You are a portfolio manager responsible for analyzing and optimizing investment portfolios.
    Apply Modern Portfolio Theory where relevant, and use Earnings per Share (EPS) to achieve a balanced risk-return profile.
    Provide actionable insights to help users make informed investment decisions, focusing on practical recommendations rather than explaining portfolio metrics.
    """

    # Send a request to the OpenAI model (e.g., GPT-4) to generate an analysis
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt},  # Define the system's behavior and goals
            {"role": "user", "content": f"Portfolio: {metrics}.\nExpected returns:{expected_returns}, covariance matrix:{covariance_matrix}"}
        ]
    )

    # Return the content of the AI's response, which contains the portfolio analysis and insights
    return response.choices[0].message.content


In [None]:
def portfolio_summary(user_name):
  user_name = user_name.lower()  # Convert to lowercase for consistency
  user_name = '_'.join(user_name.split())  # Replace spaces with underscores

  # Define the file path for the user's portfolio
  file_path = os.path.join('data', f"{user_name}_portfolio.json")

  # Check if the user's portfolio file exists
  if not os.path.exists(file_path):
      print(f"No portfolio found for user: {user_name}")
      return None, None, None

  # Load the user's portfolio data
  with open(file_path, 'r') as file:
      portfolio_data = json.load(file)

  # Initialize metrics
  total_value = 0
  total_gain_loss = 0
  summary = []

  price_data, metrics = get_stock_metrics(portfolio_data)

  for symbol in metrics:
    units = metrics[symbol]["units_held"]
    avg_cost = metrics[symbol]['avg_cost']
    current_price = metrics[symbol]["current_price"]
    # Calculate current value and gain/loss
    current_value = units * current_price
    gain_loss = (current_price - avg_cost) * units

    # Update the total portfolio value and gain/loss
    total_value += current_value
    total_gain_loss += gain_loss

    summary.append({
              "asset_name": symbol,
              "units": units,
              "avg_cost": avg_cost,
              "current_price": current_price,
              "current_value": current_value,
              "gain_loss": gain_loss
          })

  # Calculate overall portfolio value and gain/loss
  overall_summary = {
      "total_value": total_value,
      "total_gain_loss": total_gain_loss,
      "assets": summary
  }

  return overall_summary, price_data, metrics


## 3. Tax advisor

In [None]:
# Libraries
from langchain.retrievers import BM25Retriever, EnsembleRetriever

from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.faiss import FAISS

In [None]:
# Load the tax law pdf
loader = PyPDFLoader("Germany-tax law.pdf")
pages = loader.load_and_split()

In [None]:
# Initialize the BM25 retriever
bm25_retriever = BM25Retriever.from_documents(pages)
bm25_retriever.k =  2  # Retrieve top 2 results
# Use Hugging Face's Sentence Transformers model as the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embed the chunks as vectors and load them into the FAISS database
faiss_db = FAISS.from_documents(pages, embedding_model)
faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 2})

# Initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever],
                                      weights=[0.4, 0.6])


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
def advisor(query, retriever):
    # Initialize a language model for conversation
    llm = ChatOpenAI()

    # Set up a question-answering chain using the provided retriever for fetching relevant information
    # 'RetrievalQA' uses the LLM to answer queries based on the retrieved data
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

    # Use the QA chain to process the query and generate a response
    response = qa.invoke(query)

    # Return the result of the response, containing the answer to the query
    return response['result']


## App

In [None]:
from sys import breakpointhook
def start():
  # Get the user's nickname
  user_name = input("Enter your nickname: ")

  # Prompt for user preference until a valid input is provided
  while True:
      pref = input("[1] update portfolio [2] evaluate portfolio [3] Consult tax law\n")
      if pref in ["1", "2", "3"]:
          break
      print("Invalid input. Please enter either 1, 2, or 3")

  # If the user chooses to update the portfolio
  if pref == "1":
      while True:
          asset_name = input("Enter the asset name: ")
          stock = yf.Ticker(asset_name)
          try:
              if len(stock.info) > 1:
                break
              else:
                print("Invalid input. Please enter a valid asset name.")
          except Exception as e:
              print(f"Error fetching data for {asset_name}: {e}. Please enter a valid asset name.")

      # Get the number of units
      while True:
          try:
              units = int(input("Enter the number of units: "))
              if units < 0:
                  print("Number of units cannot be negative. Please try again.")
                  continue
              break
          except ValueError:
              print("Invalid input. Please enter an integer value for the number of units.")

      # Get the average cost
      while True:
          try:
              avg_cost = float(input("Enter the average cost per unit (USD): "))
              if avg_cost < 0:
                  print("Average cost cannot be negative. Please try again.")
                  continue
              break
          except ValueError:
              print("Invalid input. Please enter a numeric value for the average cost.")

      # Save the updated portfolio data for the user
      save_user_portfolio(user_name, asset_name, units, avg_cost)

  # If the user chooses to analyze their portfolio
  elif pref == "2":
      # Get a summary of the portfolio, along with price data and detailed metrics
      summary, price_data, metrics = portfolio_summary(user_name)

      # If a summary exists, display the portfolio details
      if summary:
          print(f"\nPortfolio summary for {user_name}:")
          print(f"Total Portfolio Value: ${summary['total_value']:.2f}")  # Show total portfolio value
          print(f"Total Gain/Loss: ${summary['total_gain_loss']:.2f}")  # Show overall gain/loss

          # Loop through each asset in the portfolio and display its details
          for asset in summary['assets']:
              print(f"Asset: {asset['asset_name']}, Units: {asset['units']}, "
                    f"Avg Cost: {asset['avg_cost']}, Current Price: {asset['current_price']}, "
                    f"Current Value: {asset['current_value']}, Gain/Loss: {round(asset['gain_loss'],2)}")

          # Calculate expected returns and the covariance matrix based on historical price data
          expected_returns, covariance_matrix = calculate_returns(price_data)

          # Perform an analysis of the portfolio using the calculated metrics
          analysis = portfolio_analysis(metrics, expected_returns, covariance_matrix)

          # Escape dollar signs for proper Markdown rendering
          analysis = analysis.replace('$', '\$')

          # Display the portfolio analysis as formatted Markdown text
          print("\nPortfolio Analysis:")
          display(Markdown(analysis))

  # If the user chooses to consult tax law
  elif pref == "3":
      # Prompt the user to specify their legal concern
      query = input("Specify your legal concern:\n")

      # Get a response from the advisor using the query and the ensemble retriever
      response = advisor(query, ensemble_retriever)

      # Display the advisor's response as formatted Markdown text
      display(Markdown(response))



In [None]:
start()

Enter your nickname: user
[1] update portfolio [2] evaluate portfolio [3] Consult tax law
1
Enter the asset name: NVDA
Enter the number of units: 32
Enter the average cost per unit (USD): 101


In [None]:
start()

Enter your nickname: user
[1] update portfolio [2] evaluate portfolio [3] Consult tax law
1
Enter the asset name: GOOGL
Enter the number of units: 40
Enter the average cost per unit (USD): 99


In [None]:
start()

Enter your nickname: user
[1] update portfolio [2] evaluate portfolio [3] Consult tax law
2


[*********************100%***********************]  3 of 3 completed



Portfolio summary for user:
Total Portfolio Value: $11960.24
Total Gain/Loss: $3700.24
Asset: NVDA, Units: 32, Avg Cost: 101.0, Current Price: 131.6, Current Value: 4211.2, Gain/Loss: 979.2
Asset: GOOGL, Units: 40, Avg Cost: 99.0, Current Price: 165.46, Current Value: 6618.400000000001, Gain/Loss: 2658.4
Asset: DIS, Units: 12, Avg Cost: 89.0, Current Price: 94.22, Current Value: 1130.6399999999999, Gain/Loss: 62.64

Portfolio Analysis:


Looking at your portfolio composition and taking the modern portfolio theory into perspective, I have gathered a few recommendations for you. 

1. Diversify Investments: Given the high risk associated with NVDA due to its high variance as indicated by the covariance matrix, consider diversifying your investments more into stocks such as DIS and GOOGL that present lower variance and thus lower risk.

2. Review Earnings Per Share: Currently, the stock with the highest EPS in your portfolio is GOOGL, which indicates its profitability. Thus, investing more in GOOGL may yield better dividends for your portfolio.

3. Keep an eye on market indicators: Looking at current prices and market caps, NVDA and GOOGL seem to be the largest companies representing your portfolio. However, remember that this does not ensure they are the most profitable or the least risky. Use indicators such as EPS, P/E ratios, dividend yields, and 52-week high/low prices to better gauge each investment's potential.

4. Risk-Reward tradeoff: The risk-reward tradeoff is also vital while considering investment choices. NVDA seems to provide higher expected returns but at a higher risk. On the other hand, GOOGL and DIS seem to provide moderate returns for comparatively lower risk.

5. Rebalance Portfolio: Lastly, keep revisiting and rebalancing your portfolio based on market dynamics. By doing so, one can adapt to market conditions and optimize returns. 

Please consult with a financial advisor before making any decision, as they can provide personalized guidance based on your financial goals and risk tolerance.
  
Remember that investing always involves risks and it is essential to conduct thorough due diligence. 
  
