In [9]:
from dotenv import load_dotenv
import os
import pandas as pd
load_dotenv()  # This loads the .env file

from openai import OpenAI
import json
from datetime import datetime
# Use the specific API key name you've set
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY_survey"))

### prepare audio file

In [None]:
audio_file= open("recordings/recording1.m4a", "rb")

transcription = client.audio.transcriptions.create(
    model="gpt-4o-transcribe", 
    file=audio_file
)

print(transcription.text)

In [3]:
transcription_1 = transcription.text

In [None]:
# Save transcription to a text file for easy access
with open("transcription_1.txt", "w", encoding="utf-8") as f:
    f.write(transcription_1)

# Also save as JSON for structured data handling if needed
with open("transcription_1.json", "w", encoding="utf-8") as f:
    json.dump({"transcription": transcription_1}, f, indent=2, ensure_ascii=False)

print("Transcription saved to transcription_1.txt and transcription_1.json")
print("Content:", transcription_1)

### prompt testing

### help  functions

### prepare survey to json

In [10]:

df = pd.read_excel("survey_1.xlsx", engine="openpyxl")

survey = []

for _, row in df.iterrows():
    id = str(row["QuestionID"]).strip()
    question = str(row["Question"]).strip()
    q_type = str(row["Type"]).strip().lower()
    field = str(row["Field"]).strip()
    options = str(row["Options"]).strip() if pd.notna(row["Options"]) else ""

    q_obj = {
        "field": field,
        "id": id,
        "question": question,
        "type": q_type
    }

    # Handle options if applicable
    q_obj["options"] = [opt.strip() for opt in options.split(";")]

    survey.append(q_obj)

# === Export to JSON ===
with open("survey_1.json", "w", encoding="utf-8") as f:
    json.dump(survey, f, indent=2, ensure_ascii=False)

print("Survey converted to survey_1.json")
print(survey)

Survey converted to survey_1.json
[{'field': 'info', 'id': '1', 'question': 'Participant ID', 'type': 'number', 'options': ['']}, {'field': 'economy', 'id': '2', 'question': 'Are you in debt?', 'type': 'single choice', 'options': ['yes', 'no']}, {'field': 'economy', 'id': '3', 'question': 'Are you stressed about your financial situation?', 'type': 'single choice', 'options': ['Not at all stressed', 'Slightly stressed', 'Quite stressed', 'Very stressed']}, {'field': 'accommodation', 'id': '4', 'question': 'How do you live today?/How is your accomondation situation today?', 'type': 'text', 'options': ['']}, {'field': 'accommodation', 'id': '5', 'question': 'Do you want help with accommodation', 'type': 'single choice', 'options': ['yes', 'no']}, {'field': 'health', 'id': '6', 'question': 'How do you feel?', 'type': 'text', 'options': ['']}, {'field': 'health', 'id': '7', 'question': 'How would you rate your mental health?', 'type': 'single choice', 'options': ['Very poor', 'Quite poor', 

In [16]:
with open("transcription_1_change.txt", "r", encoding="utf-8") as f:
    transcription_1_change = f.read()


In [None]:
transcription_1_change

In [None]:


# parse previous answers
try:
    with open("answers.json", "r") as f:
        answers = json.load(f)
        previous_answers = "Previous answers (for reference): \n"
        for qid, answer_data in answers.items():
            previous_answers += f"""{qid}: {answer_data['answer']} (certainty: {answer_data['certainty']})"""
            if answer_data['text field'] != "":
                previous_answers += f""" - "{answer_data['text field']}"\n"""
            else:
                previous_answers += "\n"
except FileNotFoundError:
    answers = {}

print(previous_answers)



In [None]:
questions_text = ""
for question in survey:  
    questions_text += f"{question['id']}: [{question['field']}] {question['question']} ({question['type']}"
    if question['options'] != ['']:
        questions_text += f": {', '.join(question['options'])})\n"
    else:
        questions_text += ")\n"

print(f'questions_text: \n{questions_text}')

In [21]:
#prompt for the first transcript
prompt_no_previous_answers = f"""Based on the following interview transcript between one social worker and one youth participant interested in participating in leaving care program, please fill out this survey. For each question, provide:
1. Answer: Base the answer according to the guidance provided in the parentheses. For text questions, try to cover all the relavant information for this question.
2. Certainty (low, medium, high)
3. Text field: All single/multiple choice questions must have a concise text reasoning, but make sure you cover all the relevant information related to the question. If not choice-based, leave blank.

Notes:
Output only for the questions that are clearly addressed in the transcript. 
Do not make up information, follow the transcript.
Format your response as a JSON array, nothing else.

 
SURVEY QUESTIONS:
{survey_1_questions}

TRANSCRIPT:
{transcription_1}

output example:
[
  {{
    "question_id": "5",
    "answer": "yes",
    "certainty": "high",
    "text field": "support in finding an apartment is urgent. Prefer first-hand contract"
  }},
  {{
    "question_id": "10",
    "answer": "lonely and depressed, having trouble to sleep and hard to find time for friends",
    "certainty": "medium",
    "text field": ""
  }}
]

"""

# Get GPT's analysis for all questions at once
response = client.chat.completions.create(
    model="o4-mini-2025-04-16",
    #
    #"gpt-4.1-nano-2025-04-14"
    messages=[{"role": "user", "content": prompt_no_previous_answers}]
)

response_text = response.choices[0].message.content
# Parse the response
#all_results = json.loads(response.choices[0].message.content)

print(response_text)


[
  {
    "question_id": "1",
    "answer": "25362",
    "certainty": "high",
    "text field": ""
  },
  {
    "question_id": "4",
    "answer": "I live in a placement provided by social services; I like it but find it stressful due to uncertainty about where I'll live in three months.",
    "certainty": "medium",
    "text field": ""
  },
  {
    "question_id": "5",
    "answer": "yes",
    "certainty": "high",
    "text field": "Participant explicitly asks for help finding a place to live due to limited time in current placement and uncertainty about future housing."
  },
  {
    "question_id": "6",
    "answer": "I feel good in general but stressed and anxious about where I'll live after three months and who will help me.",
    "certainty": "high",
    "text field": ""
  },
  {
    "question_id": "8",
    "answer": "yes",
    "certainty": "high",
    "text field": "Participant has ongoing contact with a psychologist, though he does not trust mental health professionals."
  },
  {
 

In [34]:
#prompt for the second/third/... transcript
prompt_with_previous_answers = f"""The following transcript is an interview between a social worker and a youth participant interested in participating in the leaving care program. You are provided with the survey (see SURVEY QUESTIONS) which have beenpartially answered before (see PREVIOUS ANSWERS) based on another transcript. You will update the answers to the survey based on the provided transcripts. 

Here is the structure to answer a question:
1. Answer: Base the answer according to the guidance provided in the parentheses. For text questions, try to cover all the relavant information for this question.
2. Certainty (low, medium, high)
3. Text field: All single/multiple choice questions must have a concise text reasoning, but make sure you cover all the relevant information related to the question. If not choice-based, leave blank.

First, you need to recheck the previous answers against the new transcript to detect any potential conflicts or new information.
- If the new transcript contains conflicting information, update the previous answer according to the current transcript. 
- If the new transcript contains additional/new information, try to update the previous answer by adding the new information while keeping the previous answer.
- If the new answer is similar to the previous answer, no need to update.

Second, find answers in the new transcript for questions not answered previously:
- Only fill out the answer if the transcript has clearly addressed the question.

important:
- Only answer the questions that are clearly addressed in the transcript.
- Output ONLY for the updated answers and newly answered questions. 
- Do not make up information, follow the transcript.
- Format your response as a JSON array, nothing else.

 
SURVEY QUESTIONS:
{questions_text}

PREVIOUS ANSWERS:
{previous_answers}

NEW TRANSCRIPT:
{transcription_1_change}

output example:
[
  {{
    "question_id": "5",
    "answer": "yes",
    "certainty": "high",
    "text field": "support in finding an apartment is urgent. Prefer first-hand contract"
  }},
  {{
    "question_id": "10",
    "answer": "lonely and depressed, having trouble to sleep and hard to find time for friends",
    "certainty": "medium",
    "text field": ""
  }}
]

"""

In [None]:
print(prompt_follow)

In [None]:
response = client.chat.completions.create(
    model="o4-mini-2025-04-16",
    messages=[{"role": "user", "content": prompt_follow}]
)

# Parse the response
all_results = json.loads(response.choices[0].message.content)

print(all_results)

In [None]:
# Load existing answers.json or start fresh
try:
    with open("answers.json", "r") as f:
        answers = json.load(f)
except FileNotFoundError:
    answers = {}

# Merge in GPT output
for item in all_results:
    qid = item["question_id"]
    answers[qid] = {
        "answer": item["answer"],
        "certainty": item["certainty"],
        "text field": item.get("text field", ""),
        "source": "ai",
        "last_updated": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    }

# Save updated answers.json
with open("answers.json", "w") as f:
    json.dump(answers, f, indent=2)

## Function dev

In [39]:
import importlib
import app.survey as survey
importlib.reload(survey)
from app.survey import process_audio_file, process_survey_excel, format_survey_questions, create_prompt_without_answers, process_ai_response, update_answers_file, update_answers_dataframe

In [16]:
transcription_2 = process_audio_file("recordings/recording2")
transcription_1 = process_audio_file("recordings/recording1")
transcription_3 = process_audio_file("recordings/recording3")
transcription_4 = process_audio_file("recordings/recording4")


In [36]:
survey_1, df = process_survey_excel("survey_1")

In [32]:

survey_1_questions = format_survey_questions(survey_1)

print(survey_1_questions)

TypeError: list indices must be integers or slices, not str

In [17]:
prompt_without_answers = create_prompt_without_answers(survey_1_questions, transcription_1)

In [25]:
new_answers = process_ai_response(response_text)

print(new_answers)

[{'question_id': '1', 'answer': '25362', 'certainty': 'high', 'text field': ''}, {'question_id': '4', 'answer': "I live in a placement provided by social services; I like it but find it stressful due to uncertainty about where I'll live in three months.", 'certainty': 'medium', 'text field': ''}, {'question_id': '5', 'answer': 'yes', 'certainty': 'high', 'text field': 'Participant explicitly asks for help finding a place to live due to limited time in current placement and uncertainty about future housing.'}, {'question_id': '6', 'answer': "I feel good in general but stressed and anxious about where I'll live after three months and who will help me.", 'certainty': 'high', 'text field': ''}, {'question_id': '8', 'answer': 'yes', 'certainty': 'high', 'text field': 'Participant has ongoing contact with a psychologist, though he does not trust mental health professionals.'}, {'question_id': '11', 'answer': 'Studying', 'certainty': 'high', 'text field': 'Participant is in school full-time, 

In [28]:
update_answers_file(new_answers)

In [40]:
update_answers_dataframe(df, new_answers)

Unnamed: 0_level_0,Question,Type,Field,Options,answer,certainty,text_field,source,last_updated
QuestionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Participant ID,number,info,,25362,high,,ai,2025-06-29 21:08:52
2,Are you in debt?,single choice,economy,yes; no,,,,,
3,Are you stressed about your financial situation?,single choice,economy,Not at all stressed; Slightly stressed; Quite ...,,,,,
4,How do you live today?/How is your accomondati...,text,accommodation,,I live in a placement provided by social servi...,medium,,ai,2025-06-29 21:08:52
5,Do you want help with accommodation,single choice,accommodation,yes; no,yes,high,Participant explicitly asks for help finding a...,ai,2025-06-29 21:08:52
6,How do you feel?,text,health,,I feel good in general but stressed and anxiou...,high,,ai,2025-06-29 21:08:52
7,How would you rate your mental health?,single choice,health,Very poor; Quite poor; Quite good; Very good,,,,,
8,Do you currently have any ongoing contact with...,single choice,health,yes; no,yes,high,Participant has ongoing contact with a psychol...,ai,2025-06-29 21:08:52
9,Do you want support for your mental or physica...,single choice,health,yes; no,,,,,
10,How often do you feel lonely?,single choice,social network,Never; Occasionally; Frequently; Almost always,,,,,
