In [29]:
import openai
from dotenv import load_dotenv
import os
import pprint
import json
from pydantic import BaseModel, Field
from typing import Optional, List

load_dotenv("../../.env")
openai.api_key = os.getenv("OPENAI_API_KEY")

In [30]:
filename = 'db-gesamt-dashboard'
import_filename = '../../text/' + filename + '.txt'

# Load text file as plain text
with open(import_filename, encoding='utf-8') as f:
    db_interviews = f.read()

In [34]:
# define function structure in pydantic

class Topic(BaseModel):
    name: str = Field(..., description="Classification category")
    count: int = Field(..., description="Count for the category")

class Interviewee(BaseModel):
    name: str = Field(..., description="interviewee name")
    about: str = Field(..., description="short description of the person. Max 25 characters")

class ExtractInsightsParams(BaseModel):
    interviewee_count: int = Field(..., description="Number of people interviewed")
    question_count: int = Field(..., description="Avergae number of questions asked")
    interview_topics: List[Topic] = Field(..., description="List of top 10 classification categories with their counts")
    interviewee_list: List[Interviewee] = Field(..., description="List of interviewees")

In [32]:
def get_completion_from_messages(messages, 
                                 model="gpt-3.5-turbo-16k-0613", 
                                 temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=3000,
        functions=[
        {
          "name": "extract_insights",
          "description": "Extract insights from interviews",
          "parameters": ExtractInsightsParams.schema()
        }
    ],
        function_call="auto",
    )
    print(response)
    function_call = response.choices[0].message["function_call"]
    arguments = json.loads(function_call["arguments"])
    return arguments

In [35]:
delimiter = "####"
system_message = f"""
You are a user researcher. \
You will be provided with customer interviews \
with train travellers. \
The interviews are delimited by {delimiter} characters.

"""

messages =  [  
{'role':'system', 'content': f"{system_message}{delimiter}{db_interviews}"},    
] 
response = get_completion_from_messages(messages)
pprint.pprint(response)

{
  "id": "chatcmpl-7a2tNciq3nqgMIkgZAnlfVKWfTAqU",
  "object": "chat.completion",
  "created": 1688825281,
  "model": "gpt-3.5-turbo-16k-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": null,
        "function_call": {
          "name": "extract_insights",
          "arguments": "{\n  \"interviewee_count\": 10,\n  \"question_count\": 40,\n  \"interview_topics\": [\n    {\n      \"name\": \"Gep\u00e4ck\",\n      \"count\": 15\n    },\n    {\n      \"name\": \"Reservierung\",\n      \"count\": 10\n    },\n    {\n      \"name\": \"Orientierung\",\n      \"count\": 8\n    },\n    {\n      \"name\": \"Versp\u00e4tungen\",\n      \"count\": 5\n    },\n    {\n      \"name\": \"Sitzplatzsuche\",\n      \"count\": 7\n    },\n    {\n      \"name\": \"Informationen\",\n      \"count\": 12\n    },\n    {\n      \"name\": \"Anschlussz\u00fcge\",\n      \"count\": 6\n    },\n    {\n      \"name\": \"WLAN\",\n      \"count\": 4\n    },\

In [8]:
count = len(response['interviewee_names'])
print("Number of interviewee_names: ", count)

Number of interviewee_names:  20
