In [6]:
import openai
from dotenv import load_dotenv
import os
import pprint
import json
from pydantic import BaseModel, Field
from typing import Optional, List

load_dotenv("../../.env")
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:
filename = 'db-gesamt-dashboard'
import_filename = '../../text/' + filename + '.txt'

# Load text file as plain text
with open(import_filename, encoding='utf-8') as f:
    db_interviews = f.read()

In [15]:
# define function structure in pydantic

class Topic(BaseModel):
    name: str = Field(..., description="Classification category")
    count: int = Field(..., description="Count for the category")

class ExtractInsightsParams(BaseModel):
    interviewee_names: List[str] = Field(..., description="The names of the interviewees, e.g. Christoph, person y")
    interviewee_count: int = Field(..., description="Number of people interviewed")
    interview_topics: List[Topic] = Field(..., description="List of top 10 classification categories with their counts")


In [16]:
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo-16k-0613", 
                                 temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=1000,
        functions=[
        {
          "name": "extract_insights",
          "description": "Extract insights from interviews",
          "parameters": ExtractInsightsParams.schema()
        }
    ],
        function_call="auto",
    )
    print(response)
    function_call = response.choices[0].message["function_call"]
    arguments = json.loads(function_call["arguments"])
    return arguments

In [17]:
delimiter = "####"
system_message = f"""
You are a user researcher. \
You will be provided with customer interviews \
with travellers using Deutsche Bahn trains. \
The interviews are delimited by {delimiter} characters.
Do the following:
1. List the interviewees names
2. Count the number of interviewees
3. Identify top 10 classification categories that are relevant \
from the perspective of a Deutsche Bahn product manager. \
List for every category the number of occurencies. 

"""

messages =  [  
{'role':'system', 'content': f"{system_message}{delimiter}{db_interviews}"},    
] 
response = get_completion_from_messages(messages)
pprint.pprint(response)

{
  "id": "chatcmpl-7VeeMmEKsqPhexaCKE3Mr0qCIGTOh",
  "object": "chat.completion",
  "created": 1687778782,
  "model": "gpt-3.5-turbo-16k-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": null,
        "function_call": {
          "name": "extract_insights",
          "arguments": "{\n  \"interviewee_names\": [\"Christoph\", \"Julia\", \"person x\", \"person y\", \"person z\", \"person p\", \"person n\", \"Susanne\", \"Gast\", \"MCK\", \"Telefoninterview\", \"TH\", \"EL\", \"TJ\", \"LG\", \"DD\", \"Frau\", \"27 j\u00e4hrige Business Frau\", \"18 j\u00e4hrige Schweizerin\", \"Mittelalter Mann\", \"Mann um die 50\"],\n  \"interviewee_count\": 21,\n  \"interview_topics\": [\n    {\"name\": \"Gep\u00e4ck\", \"count\": 19},\n    {\"name\": \"Reiseplanung\", \"count\": 14},\n    {\"name\": \"Informationen im Zug\", \"count\": 17},\n    {\"name\": \"Kinder und Familien\", \"count\": 7},\n    {\"name\": \"Rollstuhlfahrer\", \"count

In [14]:
count = len(response['interviewee_names'])
print("Number of interviewee_names: ", count)

Number of interviewee_names:  21
