# Tactus analysis

Analysis of Tactus AdB data for data paper (2020).

In [1]:
import os
import re
import sys
import xml.etree.ElementTree as ET
sys.path.insert(1, '/home/erikt/project/e-mental-health/data-processing')
import tactus2table

In [2]:
DATADIR = "/home/erikt/projects/e-mental-health/usb/tmp/20190917/"
INTAKE = "Intake"

## 1. How many clients have a therapy start date in their profile?

In [3]:
FILEPATTERN = r"^A.*z$"
QUERY1 = "./Treatment/StartDate"
STARTDATE = "STARTDATE"
NONE = "NONE"

def query1(root,query):
    try: text = root.findall(query)[0].text
    except: text = None
    if text is None: text = NONE
    elif text.strip() == "": text = NONE 
    else: text = STARTDATE
    return(text)

files = sorted(os.listdir(DATADIR))

data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text1 = query1(root,QUERY1)
        if text1 in data: data[text1] += 1
        else: data[text1] = 1
data

{'STARTDATE': 923, 'NONE': 1060}

## 2. How many clients sent an email to the counselor?

In [4]:
FILEPATTERN = r"^A.*z$"
QUERY2 = "./Messages/Message/Sender"
CLIENT = "CLIENT"
CLIENTMAIL = "CLIENTMAIL"
NOCLIENTMAIL = "NOCLIENTMAIL"

def query2(root,query):
    clientMail = NOCLIENTMAIL
    for node in root.findall(query): 
        text = node.text.strip()
        if text == CLIENT:
            clientMail = CLIENTMAIL
            break
    return(clientMail)

files = sorted(os.listdir(DATADIR))

data = {CLIENTMAIL:0,NOCLIENTMAIL:0}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text2 = query2(root,QUERY2)
        data[text2] += 1
data

{'CLIENTMAIL': 1125, 'NOCLIENTMAIL': 858}

## 3. How many of the intake forms have the title Vragenlijst?

In [4]:
FILEPATTERN = r"^A.*z$"
QUERY3 = "./Intake/Questionnaire/Title"

def query3(root,query):
    try: text = root.findall(query)[0].text.strip()
    except: text = NONE
    return(text)

files = sorted(os.listdir(DATADIR))

data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text3 = query3(root,QUERY3)
        if text3 in data: data[text3] += 1
        else: data[text3] = 1
data

{'Intake': 1937, 'Vragenlijst': 46}

## 4. Combination of 1, 2 and 3

In [6]:
data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text1 = query1(root,QUERY1)
        text2 = query2(root,QUERY2)
        text3 = query3(root,QUERY3)
        text4 = " ".join([text1,text2,text3])
        if text4 in data: data[text4] += 1
        else: data[text4] = 1
data

{'STARTDATE CLIENTMAIL Intake': 770,
 'NONE NOCLIENTMAIL Intake': 716,
 'STARTDATE NOCLIENTMAIL Intake': 131,
 'NONE CLIENTMAIL Intake': 320,
 'NONE CLIENTMAIL Vragenlijst': 14,
 'NONE NOCLIENTMAIL Vragenlijst': 10,
 'STARTDATE CLIENTMAIL Vragenlijst': 21,
 'STARTDATE NOCLIENTMAIL Vragenlijst': 1}

## 5. Count Intake Question sets

In [34]:
QUERY5 = "./Intake/Questionnaire/Content/question"
FILEPATTERN = r"^A.*z$"

answerStrings = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        answers = []
        lastQuestionNbr = "0"
        for questionNode in root.findall(QUERY5):
            try:
                try: questionNbr = questionNode.findall("./questionNumber")[0].text.strip()
                except: questionNbr = lastQuestionNbr
                for answerNode in questionNode.findall("./answer"):
                    answers.append(questionNbr+"-"+answerNode.attrib["ID"])
                lastQuestionNbr = questionNbr
            except: pass
        answerString = " ".join(answers)
        if not answerString in answerStrings: answerStrings[answerString] = 1
        else: answerStrings[answerString] += 1

answerStrings = {k:answerStrings[k] for k in sorted(answerStrings.keys(),key=lambda key:answerStrings[key],reverse=True)}
for key in answerStrings:
    print("clients: {0}; answers: {1}; answer fields: {2}".format(answerStrings[key],len(key.split()),key.split()[0:3]))

clients: 1014; answers: 259; answer fields: ['1-geslacht0', '2-leeftijd0', '3-national0']
clients: 881; answers: 140; answer fields: ['1-geslacht', '2-leeftijd', '3-woonsit']
clients: 46; answers: 27; answer fields: ['1-geslacht0', '2-medi0', '2-medicijnr0']
clients: 42; answers: 236; answer fields: ['1-geslachtt0', '2-leeftijdt0', '3-woonsitt0']


In [43]:
for inFileName in ["AdB0174.xml.gz"]:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        lastQuestionNbr = "0"
        for questionNode in root.findall(QUERY5):
            try:
                try: 
                    questionNbr = questionNode.findall("./questionNumber")[0].text.strip()
                    questionTitle = re.sub(r"\s+"," ",questionNode.findall("./title")[0].text.strip())
                except: 
                    questionNbr = lastQuestionNbr
                    questionTitle = ""
                answerNodes = questionNode.findall("./answer")
                for a in range(0,len(answerNodes)):
                    try: answerTitle = re.sub(r"\s+"," ",answerNodes[a].findall("./title")[0].text.strip())
                    except: answerTitle = ""
                    if answerTitle != "": 
                        if a == 0 and questionNbr != lastQuestionNbr: print(questionNbr,questionTitle)
                        print(questionNbr+"-"+answerNodes[a].attrib["ID"],end=" ")
                        print(answerTitle)
                    else: 
                        print(questionNbr+"-"+answerNodes[a].attrib["ID"],end=" ")
                        print(questionTitle)
                lastQuestionNbr = questionNbr
            except: pass

1-geslachtt0 Ben je man of vrouw?
2-leeftijdt0 Wat is je leeftijd?
3-woonsitt0 Hoe is je woonsituatie?
4-opleidngt0 Wat is je hoogst afgeronde opleiding?
5-dagbt0 Wat is je belangrijkste dagbesteding?
6-dagritmet0 Heb je een vast dagritme?
7-aanleidt0 Wat is de belangrijkste aanleiding voor deelname aan deze internetbehandeling?
7-verhaalt0 Wat is de belangrijkste aanleiding voor deelname aan deze internetbehandeling?
8-hoeinfot0 Hoe ben je geïnformeerd over deze internetbehandeling?
8-dsm1t0 1.Heb je in de afgelopen 12 maanden gemerkt dat je veel meer alcohol nodig begon te hebben om hetzelfde effect te bereiken of dat dezelfde hoeveelheid minder effect had dan voorheen?
8-dsm2t0 2.Heb je in de afgelopen 12 maanden het verlangen gehad om te stoppen of zonder succes geprobeerd te stoppen of minderen met alcohol?
8-dsm3t0 3.Heb je in de afgelopen 12 maanden veel tijd besteed aan het gebruik, verkrijgen, of bijkomen van de effecten van alcohol?
8-dsm4t0 4.Heb je in de afgelopen 12 maande