In [2]:
import pandas as pd
import os
from openai import OpenAI
import openai

client = OpenAI(
    api_key="" # OPENAI_API_KEY
)

In [31]:
def remove_newlines(serie):
    serie = serie.str.replace('\n', ' ')
    # serie = serie.str.replace('\\n', ' ')
    serie = serie.str.replace('  ', ' ')
    serie = serie.str.replace('  ', ' ')
    return serie

In [32]:

# Create a list to store the text files
texts=[]

domain = 'chase.com'
# Get all the text files in the text directory
for file in os.listdir("text/" + domain + "/"):

    # Open the file and read the text
    with open("text/" + domain + "/" + file, "r", encoding="UTF-8") as f:
        text = f.read()

        # Omit the first 11 lines and the last 4 lines, then replace -, _, and #update with spaces.
        texts.append((file[11:-4].replace('-',' ').replace('_', ' ').replace('#update',''), text))

# Create a dataframe from the list of texts
df = pd.DataFrame(texts, columns = ['fname', 'text'])

# Set the text column to be the raw text with the newlines removed
df['text'] = df.fname + ". " + remove_newlines(df.text)
df.to_csv('processed/scraped.csv', escapechar='\\')
df.head()

Unnamed: 0,fname,text
0,s digital customer service helpful tips online...,s digital customer service helpful tips online...
1,ersonal banking education basics is mobile ban...,ersonal banking education basics is mobile ban...
2,ersonal savings savings account,ersonal savings savings account. Chase ...
3,s digital customer service helpful tips person...,s digital customer service helpful tips person...
4,el:1 877 399 9111,el:1 877 399 9111. 404 Please turn ...


In [11]:
import numpy as np
# from openai.embeddings_utils import distances_from_embeddings

df=pd.read_csv('processed/embeddings.csv', index_col=0)
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)

df.head()

Unnamed: 0,text,n_tokens,embeddings
0,chase.com es digital customer service helpful ...,428,"[-0.03415273502469063, -0.0010539006907492876,..."
1,Banca personal Tarjetas de cré...,475,"[-0.029211552813649178, 0.00502610532566905, 0..."
2,"Síguenos: Facebook Instagram X, anteriormente ...",497,"[-0.030613889917731285, -0.002363001462072134,..."
3,Inversiones por J.P. Morgan Ya sea que escojas...,499,"[-0.01644776575267315, -0.020437059924006462, ..."
4,Ve cómo nos dedicamos a ayudar a protegerte a ...,485,"[-0.020626284182071686, -0.017504608258605003,..."


In [12]:
from typing import List, Optional
from scipy import spatial

def distances_from_embeddings(
    query_embedding: List[float],
    embeddings: List[List[float]],
    distance_metric="cosine",
) -> List[List]:
    distance_metrics = {
        "cosine": spatial.distance.cosine,
        "L1": spatial.distance.cityblock,
        "L2": spatial.distance.euclidean,
        "Linf": spatial.distance.chebyshev,
    }
    distances = [
        distance_metrics[distance_metric](query_embedding, embedding)
        for embedding in embeddings
    ]
    return distances

In [22]:
def create_context(
    question, df, max_len=1800, size="ada"
):
    """
    Create a context for a question by finding the most similar context from the dataframe
    """

    # Get the embeddings for the question
    q_embeddings = client.embeddings.create(input=question, model='text-embedding-ada-002').data[0].embedding

    # Get the distances from the embeddings
    df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')


    returns = []
    cur_len = 0

    # Sort by distance and add the text to the context until the context is too long
    for i, row in df.sort_values('distances', ascending=True).iterrows():

        # Add the length of the text to the current length
        cur_len += row['n_tokens'] + 4

        # If the context is too long, break
        if cur_len > max_len:
            break

        # Else add it to the text that is being returned
        returns.append(row["text"])

    # Return the context
    return "\n\n###\n\n".join(returns)

In [55]:
prompt = '''
You are a highly knowledgeable and friendly chatbot designed to assist customers with their needs at Chase Bank. You have access to comprehensive information from over 1200 Chase Bank websites. Your primary goal is to provide accurate, helpful, and concise responses to user inquiries regarding Chase Bank's products and services. You should always strive to be clear, informative, and user-friendly in your responses.

For each response, if applicable, provide relevant website links to the Chase Bank pages where the information is sourced from, ensuring that users can verify the information and explore further details if they wish. Whether it's helping a customer understand how to cash a check, apply for a credit card, manage their account, or get information on various banking services, you are equipped to guide them efficiently with precise and actionable information.

Your response should include the following:
1. A clear and concise answer to the user's query.
2. Any relevant steps or instructions they need to follow.
3. Links to the Chase Bank website or specific pages where the information can be verified or further explored.
4. A friendly and professional tone throughout the interaction.
'''

In [74]:
def answer_question(
    df,
    model="gpt-4o",
    conversation_history = [],
    question="Am I allowed to publish model outputs to Twitter, without a human review?",
    max_len=1800,
    size="ada",
    debug=False,
    max_tokens=500,
    stop_sequence=None
):
    """
    Answer a question based on the most similar context from the dataframe texts
    """
    context = create_context(
        question,
        df,
        max_len=max_len,
        size=size,
    )
    # If debug, print the raw model response
    if debug:
        print("Context:\n" + context)
        print("\n\n")

    try:
        
        messages = conversation_history + [
            {"role": "system", "content": f"{prompt}"},
            {"role": "user", "content": f"Context: {context}\n\n---\n\nQuestion: {question}\nAnswer:"}
        ]
        
        # Create a chat completion using the question and context
        response = client.chat.completions.create(
            model="gpt-4o",
            messages= messages,
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=stop_sequence,
        )
        
        # Extract the answer from the response
        answer = response.choices[0].message.content

        # Update conversation history
        conversation_history.append({"role": "user", "content": question})
        conversation_history.append({"role": "assistant", "content": answer})
        
        return answer
    except Exception as e:
        print(e)
        return ""

In [75]:
# answer_question(df, question="What day is it?", debug=False)

# answer_question(df, question="What is our newest embeddings model?")

answer_question(df, question="Do you have any investment plan for me?")

"Yes, Chase Bank offers a variety of investment plans tailored to meet different financial goals and needs. Here are some key options you can consider:\n\n1. **Wealth Plan Tool**: This tool provides a comprehensive view of your current financial situation and helps you prioritize your goals. It can assist you in planning for retirement, saving for college, or building a lasting legacy.\n\n2. **Retirement Planning**: Whether you are looking forward to more family time, travel, or pursuing your passions, Chase can help you prepare for retirement. Learn more about retirement planning [here](https://www.chase.com/personal/investments/retirement).\n\n3. **Education Planning**: From 529 plans to financial aid, Chase can help you understand the best saving and investment opportunities for your family’s education needs. Learn more about education planning [here](https://www.chase.com/personal/investments/education).\n\n4. **Sustainable Investing**: If you want to align your portfolio with your

In [90]:
# Example usage
conversation_history = []
question = "what is the difference between a credit card and a debit card"

# First question
answer = answer_question(df, question = question, conversation_history = conversation_history)
print(answer)

A credit card and a debit card are both payment methods that allow you to make purchases, but they operate differently and have distinct features. Here are the key differences:

### Credit Card:
1. **Borrowed Funds**: When you use a credit card, you are borrowing money from the card issuer up to a certain limit. You will need to pay back this amount, usually with interest if not paid in full by the due date.
2. **Credit Score Impact**: Using a credit card responsibly can help build your credit score. Late payments or high balances can negatively impact your credit score.
3. **Rewards and Perks**: Many credit cards offer rewards such as cash back, travel points, or other perks.
4. **Interest and Fees**: If you do not pay off your balance in full each month, you will incur interest charges. There may also be annual fees, late payment fees, and other charges.
5. **Fraud Protection**: Credit cards generally offer strong fraud protection, and you are typically not liable for unauthorized ch

In [91]:
# Next question
question = "What was the question i just asked you?"
answer = answer_question(df, question = question, conversation_history = conversation_history)
print(answer)

You asked about the difference between a credit card and a debit card. Here is a summary of the key differences:

### Credit Card:
1. **Borrowed Funds**: Uses borrowed money from the card issuer.
2. **Credit Score Impact**: Affects your credit score.
3. **Rewards and Perks**: Often offers rewards like cash back or travel points.
4. **Interest and Fees**: May incur interest and fees if not paid in full.
5. **Fraud Protection**: Generally offers strong fraud protection.

### Debit Card:
1. **Own Funds**: Uses money directly from your checking account.
2. **No Impact on Credit Score**: Does not affect your credit score.
3. **Limited Rewards**: Usually offers fewer rewards.
4. **No Interest**: No interest charges, but may have overdraft fees.
5. **Fraud Protection**: Offers fraud protection, but recovery may be slower.

For more detailed information, you can visit the Chase Bank website:
- [Credit Cards](https://www.chase.com/personal/credit-cards)
- [Debit Cards](https://www.chase.com/per

In [92]:
conversation_history

[{'role': 'user',
  'content': 'what is the difference between a credit card and a debit card'},
 {'role': 'assistant',
  'content': 'A credit card and a debit card are both payment methods that allow you to make purchases, but they operate differently and have distinct features. Here are the key differences:\n\n### Credit Card:\n1. **Borrowed Funds**: When you use a credit card, you are borrowing money from the card issuer up to a certain limit. You will need to pay back this amount, usually with interest if not paid in full by the due date.\n2. **Credit Score Impact**: Using a credit card responsibly can help build your credit score. Late payments or high balances can negatively impact your credit score.\n3. **Rewards and Perks**: Many credit cards offer rewards such as cash back, travel points, or other perks.\n4. **Interest and Fees**: If you do not pay off your balance in full each month, you will incur interest charges. There may also be annual fees, late payment fees, and other 