In [5]:
# Internal dependencies
from modules.api_calls import ApiCalls
from modules.auxiliar import Config, FileProcessing

# External libraries
import datetime as dt
import logging
import pandas as pd

configuration = Config('config.yaml')

# Logging format and configuration file
log_format = '%(asctime)s - %(message)s'
logging.basicConfig(filename=configuration.log_file, format=log_format, level=logging.INFO, force=True)

files = FileProcessing(local_process=True)
calls = ApiCalls(configuration)

dataframe_list: list[pd.DataFrame]  = list()
dataframe_names: list[str]          = list()

# Downloading all form information
bt = dt.datetime.now()
dataframe_list.extend(calls.get_form_data())
dataframe_names.extend(['forms', 'form_sections', 'form_questions', 'form_options'])

logging.info(f'Time elapsed for form data: {dt.datetime.now()-bt}')

start   = configuration.days_start
end     = configuration.days_end
run, all_records = calls.load_records(days_start = start, days_end = end, all_records = True)

if run == False:
    logging.warning('The process completed as no data was found for the time period')

run, eval_records = calls.load_records(days_start = start, days_end = end, all_records = False)

dataframe_list.extend([all_records, eval_records])
dataframe_names.extend(['all_records', 'eval_records'])


In [6]:
for item in range(0, len(dataframe_list)):
    files.export(dataframe_list[item], dataframe_names[item], bt)

In [78]:
eval_records

Unnamed: 0,recordId,startTime,tz,tzOffset,callDuration,ani,dnis,line,audioUploadState,screenUploadState,...,metadata.call-sid-id-key.encrypted,metadata.call-sid-id-key.exportable,metadata.call-sid-id-key.readOnly,metadata.client-corp-id-key.metadata,metadata.client-corp-id-key.name,metadata.client-corp-id-key.value,metadata.client-corp-id-key.type,metadata.client-corp-id-key.encrypted,metadata.client-corp-id-key.exportable,metadata.client-corp-id-key.readOnly
0,6798208,2022-10-20 21:56:00,America/New_York,0,0,,,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,237941,TEXT,False,True,False
1,6797639,2022-10-17 13:46:00,America/New_York,0,0,,,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,237941,TEXT,False,True,False
2,6796301,2022-10-18 21:35:00,America/New_York,0,0,,,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,237941,TEXT,False,True,False
3,6796290,2022-10-17 15:26:00,America/New_York,0,0,,,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,237941,TEXT,False,True,False
4,6796199,2022-10-20 11:46:00,America/New_York,0,0,,,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,237941,TEXT,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,6577602,2022-10-05 16:12:14,America/New_York,0,900000,+18033737372,+18882717360,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,192054,TEXT,False,True,False
148,6570617,2022-10-04 19:44:37,America/New_York,0,548000,+16479931863,+18445130628,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,230874,TEXT,False,True,False
149,6556153,2022-10-03 19:15:57,America/New_York,0,530000,+18137273991,+13034819400,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,232504,TEXT,False,True,False
150,6553934,2022-10-03 13:11:11,America/New_York,0,575000,+13168064664,+18775563508,,2,2,...,False,True,False,client-corp-id-key,Client Corp ID,186438,TEXT,False,True,False


In [117]:
from modules.api_connection import ApiConnection

caller = ApiConnection(configuration)
for index, record in  eval_records.iterrows():
     record['recordId']

80521
80517
80492
80490
80485
80483
80482
80480
80477
80475
80470
80469
80488
80466
80465
80474
80463
80458
80443
80500
80438
80430
80448
80425
80418
80413
80414
80422
80405
80403
80411
80541
80406
80409
80424
80395
80427
80455
80499
80391
80506
80431
80434
80486
80432
80467
80402
80421
80441
80512
80408
80446
80428
80459
80439
80440
80538
80396
80511
80540
80398
80423
80429
80503
80507
80416
80527
80419
80501
80462
80526
80420
80468
80489
80539
80346
80536
80401
80435
80453
80496
80426
80452
80513
80473
80365
80449
80487
80417
80397
80543
80504
80444
80515
80493
80529
80437
80479
80412
80518
80457
80399
80498
80534
80502
80516
80404
80476
80535
80542
80525
80523
80532
80509
80514
80461
80447
80531
80519
80537
80394
80528
80390
80530
80522
80524
80387
80410
80385
80415
80445
80497
80389
80478
80393
80456
80484
80481
80491
80495
80460
80464
80388
80508
80520
80442
80533
80451
80392
80494
80450
80471


In [102]:
df_evaluation, df_sections, df_questions, df_comments = load_answers(6797639, 80517)

In [106]:
df_comments.columns

Index(['$ref', 'text', 'created', 'tz', 'contactOffset', 'questionFK',
       'sectionFK', 'history', 'commentor.$ref', 'commentor.displayId',
       'commentor.lastName', 'commentor.firstName', 'commentor.username'],
      dtype='object')

In [None]:
def expand_data(input_df: pd.DataFrame, prefix: str, base_column: str, type: int) -> pd.DataFrame:
        df_output = pd.DataFrame()

        for index, row in input_df.iterrows():
            tmp_df = pd.DataFrame(row[base_column])

            if type == 0:
                tmp_df['form_id'] = row['id']
            elif type == 1:
                tmp_df['form_id'] = row['form_id']
                tmp_df['section_id'] = row['id']
            elif type == 2:
                tmp_df['form_id'] = row['form_id']
                tmp_df['section_id'] = row['section_id']
                tmp_df['question_id'] = row['id']
            
            df_output = pd.concat([df_output, tmp_df], ignore_index=True)
        return df_output

In [None]:
class DataExpansion():
    def __init__(self, input_df: pd.DataFrame, base_name: str ):
        self.input_df   = input_df
        self.base_name  = base_name
        self.output_list    = []

    def singular_name(self, base_name: str) -> str:
        column_name = base_name.replace('Id','')
        new_name = column_name[0:-1] if column_name[-1] == 's' else column_name
        return new_name

    def expand_dataframe(self) -> list[pd.DataFrame]:
        import re

        pattern: str    = '\[\{\''
        loop: bool      = True

        while loop:
            df = self.input_df
            nested_columns = []

            for column in df:

                # Validate if the column contains nested data
                if bool(re.match(pattern, str(df[column][0]))):

                    tmp_df      = pd.json_normalize(df[column][0])

                    # If the base name is a plural, update it to singular
                    singular    = self.singular_name(self.base_name)

                    # Add the base column as singular to the new dataframe to maintan the relation
                    tmp_df[f'{singular}Id'] = df[self.base_name]
                    
                    # Update the base dataframe ID
                    singular    = self.singular_name(column)
                    new_name    = f'{singular}Id'
                    nested_columns.append(new_name)
                    tmp_df      = tmp_df.rename(columns={'id' : new_name})
                    
                    self.output_list.append(tmp_df)

                    self.base_name = new_name
                    self.input_df = tmp_df

            if len(nested_columns) == 0:
                loop = False
        return self.output_list

In [None]:
# evaluation_df = df_1.rename(columns={'id': 'evaluationId'})
data = DataExpansion(evaluation_df, 'evaluationId')
data.expand_dataframe()

In [None]:
evaluations = names[0].rename(columns={'id': 'formId'})
evaluations = evaluations.drop(columns='bandRanges')
data = DataExpansion(evaluations, 'formId')
all_dfs = data.expand_dataframe()
all_dfs[2]