In [20]:
import openai
import json
from dotenv import load_dotenv
import os
import re
import pandas as pd

In [2]:
# Carregue a chave de API do arquivo .env
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

In [3]:
# Carregue as transações do JSON
data = """
{
  "transactions": [
    {
      "transaction_id": "3CWE4927CF15355",
      "description": "APPL3STORE",
      "type": "OUTFLOW",
      "amount": 650.89,
      "currency": "BRL",
      "institution": "Erebor Retail Brasil",
      "merchant": {
        "merchant_name": "Apple, Inc"
      }
    },
    {
      "transaction_id": "3CWE4927CF15996",
      "description": "OXXO SP",
      "type": "OUTFLOW",
      "amount": 999.9,
      "currency": "BRL",
      "institution": "BCO DO BRASIL",
      "merchant": {
        "merchant_name": "Merchants R Us Global"
      }
    }
  ]
}
"""
transactions = json.loads(data)["transactions"]

In [25]:
# Crie uma função para categorizar uma transação
def categorize_transaction(transaction):
    # Use a API ChatGPT para gerar uma categoria e subcategoria com base na descrição da transação
    response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
            {"role": "system", "content": "You are a helpful assistant with a lot of domain in Brazilian Open Finance."},
            {"role": "user", "content": f"I have a transaction with the following details: Description: {transaction['description']}, Type: {transaction['type']}, Amount: {transaction['amount']}, Currency: {transaction['currency']}, Institution: {transaction['institution']}, Merchant: {transaction['merchant']['merchant_name']}. Can you help me categorize this transaction into category and subcategory? Put the category and subcategory inside '<>' to be easy to fetch"},
        ]
    )
    # A resposta do modelo será a última mensagem do usuário
    category = response['choices'][0]['message']['content']
    # Use regex para extrair a categoria e subcategoria
    matches = re.findall(r'<(.*?)>', category)
    if len(matches) == 2:
        return {"Category": matches[0], "Subcategory": matches[1], "Transaction Data": transaction, "ChatGPT Response": response}
    else:
        return {"Category": None, "Subcategory": None, "Transaction Data": transaction, "ChatGPT Response": response}

In [26]:
# Categorize cada transação e armazene os resultados em um DataFrame
results = []
for transaction in transactions:
    categories = categorize_transaction(transaction)
    results.append({
        "Transaction ID": transaction['transaction_id'],
        "Category": categories["Category"],
        "Subcategory": categories["Subcategory"],
        "Transaction Data": categories["Transaction Data"],
        "ChatGPT Response": categories["ChatGPT Response"]
    })

In [27]:
df = pd.DataFrame(results)

In [29]:
df.to_csv('mastercard_chatgpt.csv', header=True, index=None)

In [17]:
df['Category'][0]

"Based on the information provided, here's the categorization for the transaction:\n\nCategory: <Retail>\nSubcategory: <Technology>\n\nSo the transaction can be categorized as <Retail><Technology>."