In [1]:
import pandas as pd
from koboextractor import KoboExtractor
from dotenv import load_dotenv
import os

In [2]:
# Load environment variables
if not load_dotenv(override=True):
    raise FileNotFoundError('.env file not found. ensure it exists and properly configured.')

# Get environment variables
api = os.getenv('api')
url = os.getenv('url')
form_id = os.getenv('form_id')

if not api or not url or not form_id:
    raise ValueError("Missing required environment variables: 'api', 'url', 'form_id'")

In [3]:
# Initialize KoboExtractor
kobo = KoboExtractor(api, url, debug=False)

In [4]:
# Fetch assets
assets = kobo.list_assets()
asset_uid = next((item['uid'] for item in assets['results'] if item['uid'] == form_id), None)

if not asset_uid:
    raise ValueError(f"Form ID '{form_id}' not found in assets.")

In [5]:
# Fetch asset details
asset = kobo.get_asset(asset_uid)
choice_lists = kobo.get_choices(asset)
questions = kobo.get_questions(asset=asset, unpack_multiples=True)

In [6]:
raw_data = kobo.get_data(asset_uid)
sorted_data = kobo.sort_results_by_time(raw_data['results'])

In [7]:
labelled_data = []
for result in sorted_data:
    labelled_data.append(kobo.label_result(unlabeled_result=result, choice_lists=choice_lists, questions=questions, unpack_multiples=True))

labelled_data = pd.DataFrame(labelled_data)
labelled_data = pd.json_normalize(labelled_data['results'], errors='ignore')
labelled_data

Unnamed: 0,grp1/sex.sequence,grp1/sex.label,grp1/sex.answer_code,grp1/sex.answer_label,grp1/age.sequence,grp1/age.label,grp1/age.answer_code,grp1/age.answer_label,grp1/ethnicity.sequence,grp1/ethnicity.label,...,grp4/lifestyle_changes.choices.3.answer_code,grp4/lifestyle_changes.choices.3.answer_label,grp4/lifestyle_changes.choices.4.sequence,grp4/lifestyle_changes.choices.4.label,grp4/lifestyle_changes.choices.4.answer_code,grp4/lifestyle_changes.choices.4.answer_label,grp4/lifestyle_changes.choices.5.sequence,grp4/lifestyle_changes.choices.5.label,grp4/lifestyle_changes.choices.5.answer_code,grp4/lifestyle_changes.choices.5.answer_label
0,3,Sex,1,Male,4,Age (In years),28,28,5,Ethnicity,...,,,,,,,,,,
1,3,Sex,0,Female,4,Age (In years),32,32,5,Ethnicity,...,,,,,,,,,,
2,3,Sex,1,Male,4,Age (In years),24,24,5,Ethnicity,...,,,,,,,,,,
3,3,Sex,0,Female,4,Age (In years),27,27,5,Ethnicity,...,,,,,,,,,,
4,3,Sex,1,Male,4,Age (In years),32,32,5,Ethnicity,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,3,Sex,0,Female,4,Age (In years),27,27,5,Ethnicity,...,,,,,,,,,,
114,3,Sex,1,Male,4,Age (In years),33,33,5,Ethnicity,...,,,,,,,,,,
115,3,Sex,1,Male,4,Age (In years),35,35,5,Ethnicity,...,,,,,,,,,,
116,3,Sex,0,Female,4,Age (In years),54,54,5,Ethnicity,...,,,,,,,,,,


In [None]:
import pandas as pd
from koboextractor import KoboExtractor
from dotenv import load_dotenv
import os

# Load environment variables
if not load_dotenv(override=True):
    raise FileNotFoundError('.env file not found. ensure it exists and properly configured.')

# Get environment variables
api = os.getenv('api')
url = os.getenv('url')
form_id = os.getenv('form_id')

if not api or not url or not form_id:
    raise ValueError("Missing required environment variables: 'api', 'url', 'form_id'")

# Initialize KoboExtractor
kobo = KoboExtractor(api, url, debug=False)

# Fetch assets
assets = kobo.list_assets()
asset_uid = next((item['uid'] for item in assets['results'] if item['uid'] == form_id), None)

if not asset_uid:
    raise ValueError(f"Form ID '{form_id}' not found in assets.")

# Fetch asset details
asset = kobo.get_asset(asset_uid)
choice_lists = kobo.get_choices(asset)
questions = kobo.get_questions(asset=asset, unpack_multiples=True)

raw_data = kobo.get_data(asset_uid)
sorted_data = kobo.sort_results_by_time(raw_data['results'])

labelled_data = []
for result in sorted_data:
    labelled_data.append(kobo.label_result(unlabeled_result=result, choice_lists=choice_lists, questions=questions, unpack_multiples=True))

labelled_data = pd.DataFrame(labelled_data)
labelled_data = pd.json_normalize(labelled_data['results'], errors='ignore')

labelled_data = labelled_data[[col for col in labelled_data.columns if '.answer_label' in col]]
labelled_data.columns = [col.split('/')[1].split('.answer_label')[0] if '.answer_label' in col else col for col in labelled_data.columns]
labelled_data

Unnamed: 0,grp1/sex.sequence,grp1/sex.label,grp1/sex.answer_code,grp1/sex.answer_label,grp1/age.sequence,grp1/age.label,grp1/age.answer_code,grp1/age.answer_label,grp1/ethnicity.sequence,grp1/ethnicity.label,...,grp4/lifestyle_changes.choices.3.answer_code,grp4/lifestyle_changes.choices.3.answer_label,grp4/lifestyle_changes.choices.4.sequence,grp4/lifestyle_changes.choices.4.label,grp4/lifestyle_changes.choices.4.answer_code,grp4/lifestyle_changes.choices.4.answer_label,grp4/lifestyle_changes.choices.5.sequence,grp4/lifestyle_changes.choices.5.label,grp4/lifestyle_changes.choices.5.answer_code,grp4/lifestyle_changes.choices.5.answer_label
0,3,Sex,1,Male,4,Age (In years),28,28,5,Ethnicity,...,,,,,,,,,,
1,3,Sex,0,Female,4,Age (In years),32,32,5,Ethnicity,...,,,,,,,,,,
2,3,Sex,1,Male,4,Age (In years),24,24,5,Ethnicity,...,,,,,,,,,,
3,3,Sex,0,Female,4,Age (In years),27,27,5,Ethnicity,...,,,,,,,,,,
4,3,Sex,1,Male,4,Age (In years),32,32,5,Ethnicity,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,3,Sex,0,Female,4,Age (In years),27,27,5,Ethnicity,...,,,,,,,,,,
114,3,Sex,1,Male,4,Age (In years),33,33,5,Ethnicity,...,,,,,,,,,,
115,3,Sex,1,Male,4,Age (In years),35,35,5,Ethnicity,...,,,,,,,,,,
116,3,Sex,0,Female,4,Age (In years),54,54,5,Ethnicity,...,,,,,,,,,,


In [11]:
labelled_data = labelled_data[[col for col in labelled_data.columns if '.answer_label' in col]]
labelled_data

Unnamed: 0,grp1/sex.answer_label,grp1/age.answer_label,grp1/ethnicity.answer_label,grp1/ethnicity_specify.answer_label,grp1/religion.answer_label,grp1/marital.answer_label,grp1/education.answer_label,grp1/cadre.answer_label,grp1/work_exp.answer_label,grp1/employment.answer_label,...,grp2/age_diag.answer_label,grp3/lifestyle/smoking_freq.answer_label,grp2/htn_hosp_freq.answer_label,grp3/lifestyle/alcohol_freq.answer_label,grp4/lifestyle_changes.answer_label,grp4/lifestyle_changes.choices.1.answer_label,grp4/lifestyle_changes.choices.2.answer_label,grp4/lifestyle_changes.choices.3.answer_label,grp4/lifestyle_changes.choices.4.answer_label,grp4/lifestyle_changes.choices.5.answer_label
0,Male,28,Other (specify),Mamprusi,Christianity,Married,Diploma/Certificate,Nurse/Midwife,Less than 1 year,Contractual,...,,,,,,,,,,
1,Female,32,Other (specify),Mamprusi,Muslim,Married,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Full-time,...,< 30 years,,,,,,,,,
2,Male,24,Other (specify),Mamprusi,Christianity,Single,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Contractual,...,,,,,,,,,,
3,Female,27,Dagomba,,Christianity,Married,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Full-time,...,,,,,,,,,,
4,Male,32,Other (specify),Mamprusi,Muslim,Single,Bachelor's degree,Administrative Staff,1 – 5 years,Full-time,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,Female,27,Dagomba,,Christianity,Single,Bachelor's degree,Administrative Staff,1 – 5 years,Full-time,...,,,,,,,,,,
114,Male,33,Other (specify),Dagaaba,Christianity,Married,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Full-time,...,,,,,,,,,,
115,Male,35,Other (specify),Gonja,Muslim,Married,Primary,"Support Staff (e.g., cleaner, security)",More than 10 years,Full-time,...,30 – 39 years,,,,,,,,,
116,Female,54,Other (specify),Mamprusi,Christianity,Married,Primary,"Support Staff (e.g., cleaner, security)",More than 10 years,Full-time,...,,,,,,,,,,


In [12]:
labelled_data.columns = [col.split('/')[1].split('.answer_label')[0] if '.answer_label' in col else col for col in labelled_data.columns]
labelled_data

Unnamed: 0,sex,age,ethnicity,ethnicity_specify,religion,marital,education,cadre,work_exp,employment,...,age_diag,lifestyle,htn_hosp_freq,lifestyle.1,lifestyle_changes,lifestyle_changes.choices.1,lifestyle_changes.choices.2,lifestyle_changes.choices.3,lifestyle_changes.choices.4,lifestyle_changes.choices.5
0,Male,28,Other (specify),Mamprusi,Christianity,Married,Diploma/Certificate,Nurse/Midwife,Less than 1 year,Contractual,...,,,,,,,,,,
1,Female,32,Other (specify),Mamprusi,Muslim,Married,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Full-time,...,< 30 years,,,,,,,,,
2,Male,24,Other (specify),Mamprusi,Christianity,Single,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Contractual,...,,,,,,,,,,
3,Female,27,Dagomba,,Christianity,Married,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Full-time,...,,,,,,,,,,
4,Male,32,Other (specify),Mamprusi,Muslim,Single,Bachelor's degree,Administrative Staff,1 – 5 years,Full-time,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,Female,27,Dagomba,,Christianity,Single,Bachelor's degree,Administrative Staff,1 – 5 years,Full-time,...,,,,,,,,,,
114,Male,33,Other (specify),Dagaaba,Christianity,Married,Diploma/Certificate,Nurse/Midwife,1 – 5 years,Full-time,...,,,,,,,,,,
115,Male,35,Other (specify),Gonja,Muslim,Married,Primary,"Support Staff (e.g., cleaner, security)",More than 10 years,Full-time,...,30 – 39 years,,,,,,,,,
116,Female,54,Other (specify),Mamprusi,Christianity,Married,Primary,"Support Staff (e.g., cleaner, security)",More than 10 years,Full-time,...,,,,,,,,,,


In [8]:
from functions import process_kobo_data

In [9]:
dataframe = process_kobo_data(api, url, form_id)
dataframe

No 'results' field found in labelled_data.


[{'meta': {'_id': 651341541,
   'formhub/uuid': 'f0f4fae4aeae4f21953eadc139938b5c',
   'start': '2025-03-16T16:26:50.306-00:00',
   'end': '2025-03-16T16:58:33.150-00:00',
   '__version__': 'vHFE5qXdTi2jBpfY3SHF7Y',
   'meta/instanceID': 'uuid:5b17f57a-4b08-4788-a1fa-f6fc9a453413',
   'meta/instanceName': 'HypertensionSurvey_2025-03-16_16-26-50',
   '_xform_id_string': 'aVWfv6fYDBVU5VEzkougmw',
   '_uuid': '5b17f57a-4b08-4788-a1fa-f6fc9a453413',
   '_attachments': [],
   '_status': 'submitted_via_web',
   '_geolocation': [None, None],
   '_submission_time': '2025-03-16T16:58:34',
   '_tags': [],
   '_notes': [],
   '_validation_status': {},
   '_submitted_by': None},
  'results': {'grp1/sex': {'sequence': 3,
    'label': 'Sex',
    'answer_code': '1',
    'answer_label': 'Male'},
   'grp1/age': {'sequence': 4,
    'label': 'Age (In years)',
    'answer_code': '28',
    'answer_label': '28'},
   'grp1/ethnicity': {'sequence': 5,
    'label': 'Ethnicity',
    'answer_code': '5',
    'ans