In [27]:
import openai
from dotenv import load_dotenv
import os
import pprint
import json

load_dotenv("../../.env")
openai.api_key = os.getenv("OPENAI_API_KEY")

In [28]:
filename = 'db-gesamt-dashboard'
import_filename = '../../text/' + filename + '.txt'

# Load text file as plain text
with open(import_filename, encoding='utf-8') as f:
    db_interviews = f.read()

In [29]:
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo-16k-0613", 
                                 temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=1000,
        functions= [
          {
            "name": "extract_insights",
            "description": "Extract insights from interviews",
            "parameters": {
              "type": "object",
              "properties": {
                "interviewee_names": {
                  "type": "string",
                  "description": "The names of the interviewees, e.g. Christoph, person y"
                },
                "interviewee_count": {
                  "type": "number",
                  "description": "Number of people interviewed"
                },
                "interview_topics": {
                  "type": "string",
                  "description": "top 10 classification categories"
                }
              },
            }
          }
  ],
    )
    print(response)
    function_call = response.choices[0].message["function_call"]
    arguments = json.loads(function_call["arguments"])
    return arguments

In [30]:
delimiter = "####"
system_message = f"""
You are a user researcher. \
You will be provided with customer interviews \
with travellers using Deutsche Bahn trains. \
The interviews are delimited by {delimiter} characters.
Do the following:
1. List the interviewees names
2. Count the number of interviewees
3. Identify top 10 classification categories that are relevant \
from the perspective of a Deutsche Bahn product manager. \
List for every category the number of occurencies. 

"""

messages =  [  
{'role':'system', 'content': f"{system_message}{delimiter}{db_interviews}"},    
] 
response = get_completion_from_messages(messages)
pprint.pprint(response)

{
  "id": "chatcmpl-7VJsKRvxJvK7ATcWKvsrUUfscoUwk",
  "object": "chat.completion",
  "created": 1687698924,
  "model": "gpt-3.5-turbo-16k-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": null,
        "function_call": {
          "name": "extract_insights",
          "arguments": "{\n  \"interviewee_names\": \"Christoph, Julia, person x, person y, person z, person p, person n, Susanne, Gast, MCK, Telefoninterview, TH, EL, TJ, LG, DD, Frau, 18 j\u00e4hrige Schweizerin, 27 j\u00e4hrige Business Frau, Mittelalter Mann, Mann um die 50\",\n  \"interviewee_count\": 20,\n  \"interview_topics\": \"Reisen mit Gep\u00e4ck, Schwierigkeiten beim Reisen mit Gep\u00e4ck, Ideale Reise, DB Gep\u00e4ckservice, Planung der Reise, Informationen w\u00e4hrend der Reise, Informationen vor dem Ausstieg/Umstieg\"\n}"
        }
      },
      "finish_reason": "function_call"
    }
  ],
  "usage": {
    "prompt_tokens": 12269,
    "completion_token