In [252]:
import pandas as pd
from openai import OpenAI
import os
import numpy as np
import re

## Store Fund Details in Vector Store
- Relevant information will be retrieved from the store to generate portfolio recommendation to user
- As of now OpenAI can only handle .json for Retrieval Tools

In [253]:
fund_detail_df = pd.read_csv('../data/processed/20240223/fund_detail.csv')

In [254]:
def clean_asset_class(asset_class):
    if 'Stock' in asset_class:
        return 'Equity'
    elif 'Bond' in asset_class:
        return 'Fixed Income'
    else:
        return None

In [255]:
def clean_risk_level(risk_level):
    if risk_level == 'Conservative':
        return 1
    elif risk_level == 'Conservative to Moderate':
        return 2
    elif risk_level == 'Moderate':
        return 3
    elif risk_level == 'Moderate to Aggressive':
        return 4
    else:
        return 5

In [256]:
def clean_product_summary(summary):
    if pd.isna(summary):
        return 'No Information'
    else:
        return summary

In [257]:
fund_detail_df['asset_class'] = fund_detail_df['asset_class'].apply(clean_asset_class)
fund_detail_df = fund_detail_df[fund_detail_df['asset_class'].notna()]
fund_detail_df['risk_level'] = fund_detail_df['risk_level'].apply(clean_risk_level)
fund_detail_df['product_summary'] = fund_detail_df['product_summary'].apply(clean_product_summary)

In [258]:
fund_detail_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 311 entries, 1 to 345
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       311 non-null    int64  
 1   symbol           311 non-null    object 
 2   category         311 non-null    object 
 3   product_summary  311 non-null    object 
 4   asset_class      311 non-null    object 
 5   inception_date   311 non-null    object 
 6   expense_ratio    311 non-null    float64
 7   risk_level       311 non-null    int64  
dtypes: float64(1), int64(2), object(5)
memory usage: 21.9+ KB


In [259]:
region_df = pd.read_csv('../data/processed/20240223/region.csv')
sector_df = pd.read_csv('../data/processed/20240223/sector.csv')

In [260]:
region_df = region_df.groupby('symbol')['region'].apply(list).reset_index()
sector_df = sector_df.groupby('symbol')['sector'].apply(list).reset_index()

In [261]:
fund_detail_df = pd.merge(left=fund_detail_df[['symbol','category','product_summary','asset_class','risk_level']],right=region_df,how='left')
fund_detail_df = pd.merge(left=fund_detail_df,right=sector_df,how='left')
fund_detail_df.to_csv('fund_details.csv')
fund_detail_df = pd.read_csv('fund_details.csv')

In [262]:
import csv
import json

with open('fund_details.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    data = [row for row in reader]

# Filter out the empty key-value pair from each dictionary in the list
data = [{k: v for k, v in row.items() if k} for row in data]

with open('fund_details.json', 'w') as json_file:
    json.dump(data, json_file)

# CreatingOpenAI Assistant

In [263]:
API_KEY = os.environ['openai_api_key']

In [264]:
client = OpenAI(api_key=API_KEY)

In [265]:
instruction = """You are a financial advisor who excels in adjusting baseline investment portfolio to better suit user's investment preferences. 

You will be provided with a baseline portfolio in a format similar to the one delimited by triple quote below. Each symbol represents a fund in the portfolio. The % Allocation for all funds must add up to 100% at all time.
The number of funds in a baseline portfolio could range from 2 to 4.

Baseline Portfolio Example
\"\"\"
Symbol : VTSAX
Asset Class : Equity
% Allocation : 48.90%
Risk Level : 4

Symbol :  VTIAX
Asset Class : Equity
% Allocation : 31.80%
Risk Level : 5

Symbol : VTBIX
Asset Class : Fixed Income
% Allocation : 13.20%
Risk Level : 2

Symbol : VTILX
Asset Class : Fixed Income
% Allocation : 6.10%
Risk Level : 2
\"\"\"
You will also be given the user's investment preferences in a format similar to the one delimited by triple quote below. 

User's Investment Preferences Example
\"\"\"
I would like to invest in ESG funds that consider environmental, social, and governance issues.
\"\"\"

You are to strictly follow the steps delimited by triple quote below in chronological order to determine if it is required for you to adjust Baseline Portfolio to suit User's Investment Preference.

Remember your are only to adjust the baseline portfolio IF AND ONLY IF it does not align with User's investment preference. You can ONLY replace ONE of the funds within the baseline portfolio with ONE alternative fund to better match the user's investment objectives if and only if ALL the conditions listed in Step 2 are satisfied. If you do replace the existing fund symbol with alternative fund symbol remember to update the risk level as well. Utilise the provided information to make informed decisions.

Steps
\"\"\"
1. First check if the baseline portfolio aligns with user's investment preferences. If the baseline portfolio is not aligned with User's Investment Preference, you can move on to Step 2. Otherwise, you terminate at the current step and return the baseline portfolio as you answer.

2. For each fund symbol in the baseline portfolio.
     a. Look for alternative fund symbols that MUST satisfy ALL of the 
         following conditions strictly using the fund_details.json file 
         provided.
         Condition 1 : Alternative fund symbols must not be the same as 
         one of the existing fund symbols in the baseline portfolio.
         Condition 2 : Alternative fund symbol must satisfy user's 
          investment preference mentioned above.
         Condition 3 : Alternative fund symbol must have the same or 
         smaller risk level than the corresponding fund symbol in the 
         baseline portfolio that it is replacing.
         Condition 4 : Alternative fund must be of the same asset class as 
         the corresponding fund in the baseline portfolio that it is 
         replacing.
     b. If there are no such alternative funds, move on to the next fund in 
         the baseline portfolio and start over from step a. Otherwise 
         choose the most suitable alternative fund to replace the 
         corresponding fund in the baseline portfolio.
     c. If you have already successfully found one alternative funds to 
         replace one of the existing funds in the baseline portfolio, you 
         must terminate Step 3 and return the adjusted baseline portfolio 
         as your answer in the same format as marked out in the baseline 
         portfolio.
\"\"\"

If you are unable to find any alternative fund that satisfies all of the condition marked out in Step 2 for all existing funds in the baseline portfolio, you are to return the baseline portfolio as the answer.

You are to STRICTLY provide a RFC8259 compliant JSON response following the Answer Format provided below without deviation. 

Answer Format:
\"\"\"
[
{
  "Symbol": "symbol of the first fund",
  "Asset Class": "asset class of the first fund",
  "% Allocation": "percentage allocation of the first fund",
  "Risk Level":"risk level of the first fund"
},
{
  "Symbol": "symbol of the second fund",
  "Asset Class": "asset class of the second fund",
  "% Allocation": "percentage allocation of the second fund",
  "Risk Level":"risk level of the second fund"
},{
  "Symbol": "symbol of the third fund",
  "Asset Class": "asset class of the third fund",
  "% Allocation": "percentage allocation of the third fund",
  "Risk Level":"risk level of the third fund"
},{
  "Symbol": "symbol of the fourth fund",
  "Asset Class": "asset class of the fourth fund",
  "% Allocation": "percentage allocation of the fourth fund",
  "Risk Level":"risk level of the fourth fund"
}
]
\"\"\"

Do NOT include any explanation or accompanying text to show your thought process. Note that you do not necessarily need to include 4 funds in your answers, the number of funds in your answer should be the same as number of funds in your baseline portfolio.
"""

In [266]:
# Upload a file with an "assistants" purpose
file = client.files.create(
  file=open("fund_details.json", "rb"),
  purpose='assistants'
)

In [267]:
assistant = client.beta.assistants.create(
    name="Financial Advisor",
    instructions=instruction,
    tools=[{"type": "retrieval"}],
    model="gpt-4-turbo-preview",
    file_ids = [file.id]
)

In [268]:
assistant.model_dump()

{'id': 'asst_KRChBi4IWj0QcPWGR7nFy4hC',
 'created_at': 1710397346,
 'description': None,
 'file_ids': ['file-kNhCYwuf7WrrjSn2BTSWe8rO'],
 'instructions': 'You are a financial advisor who excels in adjusting baseline investment portfolio to better suit user\'s investment preferences. \n\nYou will be provided with a baseline portfolio in a format similar to the one delimited by triple quote below. Each symbol represents a fund in the portfolio. The % Allocation for all funds must add up to 100% at all time.\nThe number of funds in a baseline portfolio could range from 2 to 4.\n\nBaseline Portfolio Example\n"""\nSymbol : VTSAX\nAsset Class : Equity\n% Allocation : 48.90%\nRisk Level : 4\n\nSymbol :  VTIAX\nAsset Class : Equity\n% Allocation : 31.80%\nRisk Level : 5\n\nSymbol : VTBIX\nAsset Class : Fixed Income\n% Allocation : 13.20%\nRisk Level : 2\n\nSymbol : VTILX\nAsset Class : Fixed Income\n% Allocation : 6.10%\nRisk Level : 2\n"""\nYou will also be given the user\'s investment prefer

# Creating One Thread per User

In [209]:
user_input = """

Baseline Portfolio
\"\"\"
Symbol : VTSAX
Asset Class : Equity
% Allocation : 48.90%
Risk Level : 4

Symbol :  VTIAX
Asset Class : Equity
% Allocation : 31.80%
Risk Level : 5

Symbol : VTBIX
Asset Class : Fixed Income
% Allocation : 13.20%
Risk Level : 2

Symbol : VTILX
Asset Class : Fixed Income
% Allocation : 6.10%
Risk Level : 2
\"\"\"

User's Investment Preferences
\"\"\"
I would like to invest in ESG funds that consider environmental, social, and governance issues.
\"\"\"

"""

In [210]:
thread = client.beta.threads.create(
    messages = [
        {'role':'user',
         'content':user_input
         }
    ]
)

In [211]:
run = client.beta.threads.runs.create(
    thread_id = thread.id,
    assistant_id = assistant.id,
    model = 'gpt-4-turbo-preview'
)

In [230]:
run_status = client.beta.threads.runs.retrieve(
                    thread_id =thread.id,
                    run_id = run.id
               )
print(run_status.status)

completed


In [231]:
messages_list = client.beta.threads.messages.list(thread_id=thread.id)

In [232]:
messages_list

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_4Qq1D1NG07DaBPMEDDlSzEHT', assistant_id='asst_xhtFHEjbvToftIYZN3QtKnnd', content=[MessageContentText(text=Text(annotations=[], value='```json\n[\n{\n  "Symbol": "VEOIX",\n  "Asset Class": "Equity",\n  "% Allocation": "48.90%",\n  "Risk Level":"4"\n},\n{\n  "Symbol": "VTIAX",\n  "Asset Class": "Equity",\n  "% Allocation": "31.80%",\n  "Risk Level":"5"\n},\n{\n  "Symbol": "VTBIX",\n  "Asset Class": "Fixed Income",\n  "% Allocation": "13.20%",\n  "Risk Level":"2"\n},\n{\n  "Symbol": "VTILX",\n  "Asset Class": "Fixed Income",\n  "% Allocation": "6.10%",\n  "Risk Level":"2"\n}\n]\n```'), type='text')], created_at=1710392811, file_ids=[], metadata={}, object='thread.message', role='assistant', run_id='run_rAOyB4bWQyQgOOECchDqbxTt', thread_id='thread_t94KabsChayzLXVFd0FRLWsB'), ThreadMessage(id='msg_kx7ZiYMTcBcJr2kHwoX94bmr', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='\n\nBaseline Portfolio\n"""\nSy

In [238]:
print(messages_list.data[0].content[0].text.value)

```json
[
{
  "Symbol": "VEOIX",
  "Asset Class": "Equity",
  "% Allocation": "48.90%",
  "Risk Level":"4"
},
{
  "Symbol": "VTIAX",
  "Asset Class": "Equity",
  "% Allocation": "31.80%",
  "Risk Level":"5"
},
{
  "Symbol": "VTBIX",
  "Asset Class": "Fixed Income",
  "% Allocation": "13.20%",
  "Risk Level":"2"
},
{
  "Symbol": "VTILX",
  "Asset Class": "Fixed Income",
  "% Allocation": "6.10%",
  "Risk Level":"2"
}
]
```


In [113]:
response = client.beta.threads.delete(thread_id=thread.id)


In [136]:
answer = messages_list.data[0].content[0].text.value

In [137]:
import json


In [139]:
data = json.loads(answer)

In [141]:
data[0]

{'Symbol': 'VTSAX',
 'Asset Class': 'Equity',
 '% Allocation': '48.90%',
 'Risk Level': '4'}

In [248]:
def parse_json_markdown(json_string: str) -> list:
    # Try to find JSON string within first and last triple backticks
    match = re.search(r"""```       # match first occuring triple backticks
                          (?:json)? # zero or one match of string json in non-capturing group
                          (.*)```   # greedy match to last triple backticks""", json_string, flags=re.DOTALL|re.VERBOSE)

    # If no match found, assume the entire string is a JSON string
    if match is None:
        json_str = json_string
    else:
        # If match found, use the content within the backticks
        json_str = match.group(1)

    # Strip whitespace and newlines from the start and end
    json_str = json_str.strip()

    # Parse the JSON string into a Python dictionary while allowing control characters by setting strict to False
    parsed = json.loads(json_str, strict=False)

    return parsed

In [249]:
json_response = parse_json_markdown(json_string=messages_list.data[0].content[0].text.value)


In [251]:
json_response

[{'Symbol': 'VEOIX',
  'Asset Class': 'Equity',
  '% Allocation': '48.90%',
  'Risk Level': '4'},
 {'Symbol': 'VTIAX',
  'Asset Class': 'Equity',
  '% Allocation': '31.80%',
  'Risk Level': '5'},
 {'Symbol': 'VTBIX',
  'Asset Class': 'Fixed Income',
  '% Allocation': '13.20%',
  'Risk Level': '2'},
 {'Symbol': 'VTILX',
  'Asset Class': 'Fixed Income',
  '% Allocation': '6.10%',
  'Risk Level': '2'}]

In [250]:
for fund in json_response:
    print(fund)

{'Symbol': 'VEOIX', 'Asset Class': 'Equity', '% Allocation': '48.90%', 'Risk Level': '4'}
{'Symbol': 'VTIAX', 'Asset Class': 'Equity', '% Allocation': '31.80%', 'Risk Level': '5'}
{'Symbol': 'VTBIX', 'Asset Class': 'Fixed Income', '% Allocation': '13.20%', 'Risk Level': '2'}
{'Symbol': 'VTILX', 'Asset Class': 'Fixed Income', '% Allocation': '6.10%', 'Risk Level': '2'}
