# Revision History


In [1]:
# Change_date         revision_number       change_description                                          author
# 03/27/2023             1                  initial check-in                                            kranthi   
# 04/03/2023             2                  remove 322, 324 script_ids                                  kranthi
# 04/03/2023             3                  add Q1-Mapping and Q2-mapping for all scripts               kranthi
# 09/19/2023             4                  extract data from Participants child node                   kranthi
# 09/03/2025             5                  Removed subject_area pcs from processing
#                                           New ingestion for pcs is in AS_NB_8x8_NewAPI-Ingestion      Peter Daniels (3cloud)
# 

# Import libraries

In [2]:
import datetime
#import mssparkutils
from pyspark.sql import SparkSession
import requests
import json
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, ArrayType, TimestampType, DateType
import concurrent.futures
import traceback
import math
import time
import pyspark.sql.functions as F
from pyspark.sql.functions import split
spark.conf.set("spark.databricks.delta.autoCompact.enabled","true")
spark.sql("SET spark.databricks.delta.schema.autoMerge.enabled = true")
spark.conf.set("spark.databricks.io.cache.enabled", "true")
spark.conf.set("spark.sql.sources.partitionOverwriteMode","dynamic")
spark.conf.set("spark.sql.legacy.timeParserPolicy","LEGACY")
spark.conf.set("spark.sql.adaptive.enabled","true")
spark.conf.set("spark.sql.adaptive.coalescePartitions.enabled","true")
spark.conf.set("spark.sql.adaptive.skewJoin.enabled","true")
spark.conf.set("spark.databricks.adaptive.autoOptimizeShuffle.enabled","true")


In [3]:
print(mssparkutils.env.getWorkspaceName())

# import frequently used functions

In [4]:

%run /utils/common_functions

# import struct/config variables

In [5]:
%run /contact_center_ops/config/config_variables

# functions to process API data

In [6]:
#add adls path here
spark.sql("SET spark.databricks.delta.schema.autoMerge.enabled = true")
print("env_var::", env_var)

#dt = (datetime.datetime.now()).strftime('%Y-%m-%d')
dt = (datetime.datetime.now()).strftime('%Y-%m-%d')
dt_time = (datetime.datetime.now()).strftime('%Y-%m-%d %H:%M:%S')
print("date of load::", dt)
dt_date_type = datetime.datetime.strptime(dt, '%Y-%m-%d')
dt_folder_nm = (datetime.datetime.now()).strftime('%Y%m%d')
print("dt_folder_nm::",dt_folder_nm)

dt_datetime_type = datetime.datetime.strptime(dt_time, '%Y-%m-%d %H:%M:%S')
print("time of load::", dt_datetime_type)


def make_api_call(verb,headers,url,body={},params={}):
    response = ''
    #print("make_api_call::",type(headers), verb,headers,url,"body::",body,"params::",params)
    try:
        if verb == 'get':
            response = requests.get(url,headers=headers,params=params)
        elif verb == 'post':
            response = requests.post(url, json=body, headers=headers,params=params)
        return response
    except Exception as e:
        print("error in make_api_call::::",e)
        time.sleep(10)
        traceback.print_exc()
        i = 5
        while i <5:
            make_api_call(verb,headers,url,body,params)
            i=i+1
        raise    

#helps calculate the number of pages a particular script_id has
def get_threads_to_be_used(url,json_data,headers):
    res =  make_api_call('post',headers,url,json_data)
    print("url::", url,"res::",res)
    if res is not None and res.status_code ==200:
      rec_count = json.loads(res.text)['hits']
      print(rec_count,' hits for script_id ',json_data['script'])
      if rec_count>0:
        thread_count = 1
        if rec_count>500: 
          thread_count = math.ceil(rec_count/500)
        return thread_count,rec_count
      else :
        return 0,0
    else:
      print("res for get_threads_to_be_used is Null::", url, json_data,"status_code::", res.status_code) 
      return 0,0      


q1_mapping = ['getdigit-d930a6f4-f5d8-44d5-8729-c3ab0c53aba4','getdigit-ea101383-8440-4019-aa39-d52f864a9371','getdigit-79fe9d37-51e9-4a69-8f72-79c3a09db853',
'getdigit-0afff142-a242-42f0-910c-ae077f6d8a63','getdigit-cb1dba8c-7630-4ddc-9631-7f455c78ffb2','getdigit-ae648023-a2fd-4c6e-afa8-4e54cb3374bd'
,'getdigit-f2254255-0a76-4274-a910-1d1e608deafc']
q2_mapping = ['getdigit-b85fb4cc-2c33-4d52-83fb-6608ee944564','getdigit-e5843f02-ddf6-40be-8a2b-4c1499bb811f']

def create_pcs_dataframe(data,hits,questionlabel,schema):
  try:  
    all_pcs_data = []  
    for row in data:
        row_dict = {}
        row_keys = row.keys()
        row_dict['hits']= int(hits)
        q1s = 'None'
        q2s = 'None'
        q1l = []
        q2l = []
        q1l = [row[q1_keys] for q1_keys in row_keys if q1_keys in q1_mapping]
        q1s= ''.join(q1l)
        q2l = [row[q2_keys] for q2_keys in row_keys if q2_keys in q2_mapping]
        q2s= ''.join(q2l)
        #print("q1s::",q1s,"q2s::",q2s)
        row_dict['data'] = {
        'callId':row['callId'],
        'callDate': row['callDate'],
        'callerName': row['callerName'],
        'callerPhoneNumber': row['callerPhoneNumber'],
        'question1': q1s,
        'question2': q2s,
        'totalScore': row['totalScore'],
        'agentList': row['agentList'],
        'queueList' : row['queueList'],
        'transactionId': row['transactionId'],
        'agentCallHandlingDuration':row['agentCallHandlingDuration'],
        'holdDuration':row['holdDuration'],
        'muteDuration':row['muteDuration'],
        'timeInIVR':row['timeInIVR'],
        'waitTime':row['waitTime'],
        'callDuration':row['callDuration']
        #'loadDate': dt_date_type 
         }
        row_dict['QuestionLabel'] = questionlabel
        row_dict['loadDate'] = dt_date_type
        row_dict['loadDateTime'] = dt_datetime_type
        #print("row_dict::",row_dict)
        all_pcs_data.append(row_dict)
   # try:
    df = spark.createDataFrame(data=all_pcs_data,schema=schema)
    #display(df)
    return df
  except Exception as e:
        print("error in cds::::",e)
        traceback.print_exc()
        raise
        #print(op2,'\n\n')
    
    


  

        
def pcs_api_call(inp):
    print("in pcs_api_call::",inp)
    api_call_bdy = inp[0]
    verb = inp[1]
    url= inp[2]
    headers = inp[3]
    pcs_folder = inp[4]
    try:
        res = make_api_call(verb,headers,url,api_call_bdy)
        #requests.post(url, json=api_call_bdy, headers=headers)
        r = json.loads(res.text)
        if r is not None:
          data = r['data']
        #if len(data)!=0:
        if r['hits'] >0:
            #all_pcs_data.append(pcs_res)
            print("hits before pcs",r['hits'])  
            #create_pcs_dataframe(data,r['hits'],r['questionLabel'],pcs_schema)
            df = create_pcs_dataframe(data,r['hits'],r['questionLabel'],pcs_schema)
            #print(df.count())
            write_to_file(df,raw_adls_path+pcs_folder)
    except Exception as e:
        print("error in pcs api::::",e)
        traceback.print_exc()
        raise


def fetch_config(subject_area):
    vals = spark.sql(f"select * from rest_api_config where subject_area = '{subject_area}' and is_active = '1'")
    verb = vals.select('verb').head()[0]
    url = vals.select('url').head()[0]
    headers = vals.select('headers').head()[0]
    body = vals.select('body').head()[0]
    params = vals.select('params').head()[0]
    folder_in_adls =  vals.select('folder_in_adls').head()[0]
    return verb,url,headers,body,params, folder_in_adls
    
def retrieve_access_token(headers,url,verb,body={}): #,params={},params = params,
    try:
        print("in retrieve access tokens..")
        response = requests.post(url, headers=headers, data=body, verify=False) 
        return response
    except Exception as e:
        print("error::::",str(e))
        traceback.print_exc()

def create_report(access_token,headers,body,url,verb):
    try:
        Previous_Date = str((datetime.datetime.today() - datetime.timedelta(days=1)).strftime ('%Y-%m-%d'))
        print("create_report::Previous_Date::", Previous_Date)
        print("create_report::headers::",eval(headers), "url::",url,"body::", eval(body) )
        response2 = make_api_call(verb,eval(headers),url,eval(body))
        #requests.post(url, headers=eval(headers), json=eval(body))
        report_id = response2.json()['id']
        report_status = response2.json()['status']
        print("create_report::report_id::status::",report_id,"::=====",report_status)
        return report_id
    except Exception as e:
        print("error in create_report::::",e)
        traceback.print_exc()
        


def access_report_data(access_token,url,headers,report_id,is_first_page,verb,last_doc_id,hist_folder):
    try:
        print("\nin access_report_data::access_token::",access_token,"url::",url,"headers::",headers,"report_id::",report_id,"is_first_page::",is_first_page,"verb::",verb,"last_doc_id::",last_doc_id)
        if is_first_page==True:
            url = url+f'/{report_id}/data?size=1000'
        else:
            url = url+f'/{report_id}/data?size=1000&lastDocumentId={last_doc_id}'
        print("\nurl::", url,"headers::",headers)
        response3 = make_api_call(verb,headers,url,{},{})
        print("----response3--------------")
        #print("\naccess_report_data::response3.headers::", response3.headers)

        #print("\naccess_report_data::complete response3::", response3.json())
        #print("response3 headers.Last-Document-ID::",response3.headers['Last-Document-ID'])
        #print("response3 headers.X-Total-Pages::",response3.headers['X-Total-Pages'])
        #res = response3.json()['items']
        res = json.loads(response3.text)
        print("type of res:", type(res), "len of res:",len(res))
        #print("\n\nreslllll::",type(res), len(res),"res::" ,res)
        if len(res[0]['items'])!=0:
            pivot1 = []
            pivot2 = {}
            pivot3 = []
            for k in res:
              #print("kkkk::",k)
              pivot1 = [{j['key']:j['value'] }for j in k['items'] if j['key'] not in 'participants']
              #print("\n####pivot1::", len(pivot1))
           
            #print("after pivot1")
         
              pivot2 = {k: str(v) for d in pivot1 for k, v in d.items()}
              #pivot3 = []
             

              pivot1_particip = [{j['key']:j['value'] }for j in k['items'] if j['key'] in ['participants']]
              #if pivot1_particip is not None:
                #print("\n####pivot1_pa::",pivot1_particip)
              v9 = [] 
              v55 = {}
              # pivot1_particip is a list of dictionaries 
              for d in pivot1_particip:
                #print('####1pivot1_particip',d)
                for k, v in d.items():
                    #print('####2',k,v)
                    for parv in v:
                      #print('####3',parv)
                      v55 = {}
                      v55 = {k1:str(v1) for k1,v1 in parv.items() if k1 in ['participantAssignNumber','participantId','participantName','participantOfferAction']}
                      
                      if v55: ## append this dict to list
                        v9.append(v55)
                        #print('####appending ParticipToList',v9)
              pivot2['participants'] = str(v9)  ## creating a key in dictionary and value is list of dictionary
                          #print('pppcc::',pivot2['participants'])       
              
              #if 'participants' in pivot2.keys(): 
                  #print("####pivot2::", pivot2['participants'])
              pivot3.append(pivot2) ## list of dictionaries
              #if pivot1_particip:
                  #print("####pivot3::",pivot3)
                                      
           
            #print("pivot3 len::",len(pivot3))
            df = spark.createDataFrame(data=pivot3,schema=rpt_schema)
            #print("rpt_schema::",rpt_schema)
            df = df.withColumn("loadDate", F.lit(dt_date_type))\
                   .withColumn("loadDateTime",F.lit(dt_datetime_type))
            #print("after create df")
            #mssparkutils.notebook.exit("Done") 
            print("====count of HA rows writing to file::====",df.count())
            write_to_file(df,raw_adls_path+hist_folder+'/incremental/'+dt_folder_nm)
            #print("after create df")
        if 'Last-Document-ID' in response3.headers.keys():
            print("=====last_doc_id in access_report_data::===== ",response3.headers['Last-Document-ID'])
            return response3.headers['Last-Document-ID'] 
        else:
            print("no Last-Document-ID")
            return None      
    except Exception as e:
        print("error in access_report_data::::",str(e))
        traceback.print_exc()
        raise



In [7]:
 %%sql
 select 1
-- ALTER TABLE delta.`abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/historical_analytics/incremental/` SET TBLPROPERTIES (
--    'delta.columnMapping.mode' = 'name',
--    'delta.minReaderVersion' = '2',
--    'delta.minWriterVersion' = '5')

# MAIN ( )

In [8]:
def main_call(subject_area):
  try:  
    if subject_area == 'pcs':
        # As of 2025-08-25, this should no longer get called.  Only the second section (the elif) will.
        print("subject_area1::",subject_area)
        #add all survey script id to be run to this list
        #survey_script_id = [69,70,301,304,322,303,321,305,324]
        verb,vurl_original,headers,body,params,folder = fetch_config(subject_area)
        print("config::", verb,vurl_original,headers,body)
        headers = json.loads(headers)
        body = json.loads(body)
        dict_pcs = {'us': {'regions':[69,70,301,304,303,321,305], 'timezone': 'America/New_York'}
            #,'uk': {'regions': [301,304,322,303,321,305,324], 'timezone':'Europe/London'}
            }
        api_calls_list = []
        for key1, val in dict_pcs.items():
          region = key1  
          print("region:",region, "vurl original::",vurl_original)
          vurl = vurl_original.format(region=region)
          print("region:",region, "vurl substituted::",vurl)
          script_ids = val['regions']
          timezone = val['timezone']
          for script_id in script_ids:
              #print("script_id:",script_id, "body:",body)
              body['script']= [script_id]
              body['timezone'] = timezone
              print("script_id::",script_id,"url:",vurl,"headers:",headers,"body:",body)  
              page_count,hits = get_threads_to_be_used(vurl,body,headers)
              if page_count >=1:
                for pg in range(1,page_count+1):
                    json_data2 = body.copy()
                    json_data2['page']= pg
                    print("scriptid::",script_id, "::page::",pg)
                    api_calls_list.append((json_data2,verb,vurl,headers,folder))
        
        #print('11')  
        #with concurrent.futures.ThreadPoolExecutor(max_workers=len(api_calls_list)) as executor:
            #executor.map(pcs_api_call,api_calls_list)
        for i in api_calls_list:
          pcs_api_call(i)
        # pcs_flat_list = []
        # #remove null lists
        # all_pcs_data1 = [ele for ele in all_pcs_data if ele != []]
        # pcs_flat_list = list(map(pcs_flat_list.extend, all_pcs_data1))
        # print("pcs data::",len(pcs_flat_list)) 
        # print("after flat list")
        #df = spark.createDataFrame(pcs_flat_list,pcs_schema)
        print("after create pcs df")
        #display(df)
        #print(df.count())
        print("after create pcs df2")
        #write_to_file(df,'pcs/pcs') 
    elif subject_area == 'historical_analytics':
        print("subject_area2::",subject_area)
        verb2,url2,headers2,body2,params,folder = fetch_config(subject_area)
        access_token = ""
        issued_at = ""
        expires_in = ""
        is_first_page = True
        response = retrieve_access_token(json.loads(headers2),url2,verb2,json.loads(body2))
        if response.status_code != 200:
            raise Exception("{} - {}".format(response.status_code, response.text))
            time.sleep(10)
        else:
            response_dict = json.loads(response.text)
            access_token = response_dict['access_token']
            issued_at = response_dict['issued_at']
            expires_in = response_dict['expires_in']
            #expires_in = 1200
            print("token expires in---",expires_in)
            token_issuetime = datetime.datetime.now()
            verb3,url3,headers3,body3,params3,folder = fetch_config('create_report')
            report_id = create_report(access_token,headers3,body3,url3,verb3)
            verb4,url4,headers4,body4,params4,folder = fetch_config('access_report')

            
            if is_first_page == True:
              last_doc_id = access_report_data(access_token,url4,eval(headers4),report_id,is_first_page,verb4,'',folder)
              is_first_page = False
              
              while last_doc_id is not None:
                 print("=========last_doc_id::======== ",last_doc_id)
                 if (datetime.datetime.now()-token_issuetime).seconds >= int(expires_in)-180:
                    print("======================*****token expired******===============================")
                    verb2,url2,headers2,body2,params,folder = fetch_config(subject_area)
                    #print("hee::",headers2, "url2::",url2,"v2::",verb2,"b2::",body2 )
                    #def retrieve_access_token(headers,url,verb,body={},params={}):
                    response = retrieve_access_token(json.loads(headers2),url2,verb2,json.loads(body2)) 
                    #print('dddffee')
                    response_dict = json.loads(response.text)
                    access_token = response_dict['access_token']
                    issued_at = response_dict['issued_at']
                    expires_in = response_dict['expires_in']
                    print("token expires in---",expires_in,"access_token::", access_token)
                    token_issuetime = datetime.datetime.now()
                    #verb3,url3,headers3,body3,params3,folder = fetch_config('create_report')
                    #report_id = create_report(access_token,headers3,body3,url3,verb3)
                    #verb4,url4,headers4,body4,params4,folder = fetch_config('access_report') 
                 last_doc_id = access_report_data(access_token,url4,eval(headers4),report_id,is_first_page,verb4,last_doc_id,folder)
                 
  except Exception as e:
    print("error in main()::::",str(e))
    traceback.print_exc()
    raise




         
    

## call both PCS and Historical Analytics API

In [14]:
# Removed pcs from here on 2025-08-25 to process with new API in a different notebook
subject_areas = ['historical_analytics']#,'pcs']
#subject_areas = ['historical_analytics']
with concurrent.futures.ThreadPoolExecutor(max_workers=len(subject_areas)) as executor:    
    executor.map(main_call,subject_areas)

In [10]:
#mssparkutils.fs.rm('abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/historical_analytics/incremental/20230921',True)

In [11]:
# -- %%sql
# -- select distinct participants
# -- --participantAssignNumber,participantId,participantName,participantOfferAction
# --   from delta.`abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/historical_analytics/incremental/20230926`
# # -- where participants is not null or (participants) != '[]';

# get_ccrt_data

In [15]:
queue_schema = StructType([StructField('id', StringType(), True),
                           StructField('name', StringType(),True),
                         
                                     ]
                                     
                                     )
                                    
def get_ccrt_data(p_url,p_headers,p_verb,p_params,p_folder):
    print('in get_ccrt_data::',p_folder, "url::",p_url)
    
    
    print(p_url,p_headers,p_verb,{},p_params)
    queue_res = make_api_call(p_verb,p_headers,p_url,{},p_params)
    print("queue_res::", queue_res.status_code)
    if queue_res.status_code ==200 :
      print("headers:",queue_res.headers)
      queue_row = json.loads(queue_res.text)
      #print("queue_row::",queue_row )
      for item in queue_row: 
            item.pop('metrics')
        #print("agent page 0:",agent_res)  
      all_queues.append(queue_row)  
      #df = spark.createDataFrame(queue_row,queue_schema)
      #write_to_file(df,p_folder,'N')
      page_count = int(queue_res.headers['X-Total-Pages'])
      print("page_count for ",p_folder," ::",page_count)
      if page_count >1:
        for pg in range(1,page_count):
          queue_params2 = p_params.copy()
          queue_params2['page']= pg
          queue_res = make_api_call(p_verb,p_headers, p_url,{},queue_params2)
          queue_row = json.loads(queue_res.text)
          #make_api_call(verb4,headers_dict,url4,{},json.loads(params4))
          #all_queues.append(queue_row)
          print("appending data to list::",p_folder, "::page::",pg,"elements in this page::",len(queue_row))
          #print("data in this page::", queue_row)
          #df = spark.createDataFrame(queue_row,queue_schema)
          #write_to_file(df,p_folder,'N')
          for item in queue_row: 
            item.pop('metrics')
            #print("agent page 0:",agent_res)  
            all_queues.append(queue_row) 
    #return all_queues      


# call queue and group_id

In [16]:


#def ccrt_subject_processing(p_subject_area):
  #        ccrt_subject = p_subject_area
all_queues = [] 
subject_area = ['queue','group_id']
#subject_area = ['queue']
for ccrt_subject in subject_area:
          all_queues = [] 
          print("subject_area2::",ccrt_subject)
          verb3,url3,headers3,body3,params3,p_folder = fetch_config('ccrt')
          access_token = ""
          issued_at = ""
          expires_in = ""
          is_first_page = True
          #print(verb3,url3,headers3,body3,params3)
          response = retrieve_access_token(headers=json.loads(headers3),url=url3,verb=verb3,body=json.loads(body3))
          print(response)
          if response.status_code != 200:
              raise Exception("{} - {}".format(response.status_code, response.text))
              time.sleep(10)
          else:
              print('ccrt_subject response.status_code is 200')
              
              verb4,url4,headers4,body4,params4,p_folder = fetch_config(ccrt_subject)
             
              response_dict = json.loads(response.text)
              queue_token = response_dict['access_token']
              print("queue_token::",queue_token)
              issued_at = response_dict['issued_at']
              expires_in = response_dict['expires_in']
              #print("---",expires_in)
            
              #print("sss::",ccrt_subject, verb4,url4,headers4,body4,params4,p_folder)
            
              headers_dict = json.loads(json.dumps(eval(headers4))) 
              #if p_subject_area == 'agents': 
                #url4 = eval(url4)     
              get_ccrt_data(url4,headers_dict,verb4,json.loads(params4),p_folder)
              #print("after appending ccrt data to list, length of list::", len(all_queues))
              flat_list = []
              flat_list = [item for sublist in all_queues for item in sublist]
              #all_queues1 = [ele for ele in all_queues if len(ele) >0]
              #single_list = list(map(flat_list.extend, all_queues))
              #print(len(single_list))
              #print("flat_list::",flat_list)              
              df = spark.createDataFrame(flat_list,queue_schema)
              print("df count::",df.distinct().count())
              df = df.withColumn("lastUpdateDate",F.lit(dt_datetime_type))  
              write_to_file(df,raw_adls_path+p_folder) 


# def agent_api_call

In [17]:
all_agent_data = []

def agent_api_call(inp):
    print("in agent_api_call::",inp)
    api_params = inp[0]
    verb = inp[1]
    url= inp[2]
    headers = inp[3]
    gr_id =inp[4]
    
    try:
        res = make_api_call(verb,headers,url,{},api_params)
        #requests.post(url, json=api_call_bdy, headers=headers)
        print("res::",res)
        agent_res = json.loads(res.text)
        page_count = int(res.headers['X-Total-Pages'])
       # print("ffff:",r)
        for item in agent_res: 
            item['group_id']=gr_id
            item.pop('metrics')
        #print("agent page 0:",agent_res)  
        all_agent_data.append(agent_res)        
        if page_count >1:
            for pg in range(1,page_count):
                agent_params2 = api_params.copy()
                agent_params2['page']= pg
                agent_res = make_api_call(verb,headers,url,{},agent_params2)
                agent_res = json.loads(agent_res.text)
                # add group_id
                for item in agent_res: 
                  item['group_id']=gr_id
                  item.pop('metrics')
                #print("agent page#:",pg,agent_res)  
                all_agent_data.append(agent_res) 
    except Exception as e:
      print("error::::",e)
      traceback.print_exc()
      raise

# call agent api

In [18]:
verb3,url3,headers3,body3,params3,p_folder = fetch_config('ccrt')
access_token = ""
issued_at = ""
expires_in = ""
is_first_page = True
#print(verb3,url3,headers3,body3,params3)
response = retrieve_access_token(headers=json.loads(headers3),url=url3,verb=verb3,body=json.loads(body3))
print(response)
if response.status_code != 200:
    raise Exception("{} - {}".format(response.status_code, response.text))
    time.sleep(10)
else:
    print('ccrt_subject response.status_code is 200')
    verb4,url4,headers4,body4,params4,agent_folder = fetch_config('agents')
    
    response_dict = json.loads(response.text)
    queue_token = response_dict['access_token']
    print("queue_token::",queue_token)
    issued_at = response_dict['issued_at']
    expires_in = response_dict['expires_in']
    #print("---",expires_in)

    #print("sss::",ccrt_subject, verb4,url4,headers4,body4,params4,p_folder)

    headers_dict = json.loads(json.dumps(eval(headers4))) 
    #url4 = eval(url4)     
#    queue_data = get_ccrt_data(url4,headers_dict,verb4,json.loads(params4),p_folder)
all_agent_api = []
#del_folder(p_folder)
df = spark.read.load(raw_adls_path+"pcs/group_id",format = "delta")
all_group_ids = df.select("id").distinct().collect()
for j in all_group_ids:
  #print(j.id)
  group_id = j.id
  all_agent_api.append((json.loads(params4),verb4,eval(url4),headers_dict, group_id))
with concurrent.futures.ThreadPoolExecutor(max_workers=len(all_agent_api)) as executor:
  executor.map(agent_api_call,all_agent_api)

## convert list of list to a single list  
print(len(all_agent_data))
flat_list = []
flat_list = [item for sublist in all_agent_data for item in sublist]
print(len(flat_list))  
df = spark.createDataFrame(flat_list,queue_schema)
df = df.withColumn("lastUpdateDate",F.lit(dt_datetime_type))
write_to_file(df,raw_adls_path+agent_folder) 
  
  


In [19]:
print(all_group_ids)

In [None]:


# from pyspark.sql import SparkSession
# from pyspark.sql.functions import from_json, col, collect_list
# from pyspark.sql.types import StructType, StructField, StringType, ArrayType

# # Initialize a Spark session
# #spark = SparkSession.builder.appName("ConvertToListOfNames").getOrCreate()

# # Sample string containing a list of dictionaries
# # data = """[{'participantAssignNumber': 1
# # , 'participantType': 'Agent'
# # , 'participantId': 'ag9K9PcuAlSdq8NBG6aQBAdg'
# # , 'participantName': 'Jennifer Gray'
# # , 'participantOfferTime': '2023-09-06T16:47:45.091-04:00'
# # , 'participantOfferAction': 'OfferTimeout'
# # , 'participantOfferActionTime': '2023-09-06T16:48:00.096-04:00'
# # , 'participantOfferDuration': {'value': 15005, 'ongoing': False}
# # , 'participantHandlingEndTime': None
# # , 'participantHandlingDuration': None
# # , 'participantWrapUpEndTime': None
# # , 'participantWrapUpDuration': None
# # , 'participantProcessingDuration': None
# # , 'participantBusyDuration': {'value': 15005, 'ongoing': False}
# # , 'warmTransfersCompleted': 0
# # , 'blindTransferToAgent': 0
# # , 'blindTransferToQueue': 0
# # , 'consultationsEstablished': 0
# # , 'conferencesEstablished': 0
# # , 'participantHold': None
# # , 'participantHoldDuration': None
# # , 'participantLongestHoldDuration': None
# # , 'participantMute': None
# # , 'participantMuteDuration': None
# # , 'participantLongestMuteDuration': None
# # , 'transactionCodeListId': []
# # , 'transactionCodeListName': []
# # , 'transactionCodeItemId': []
# # , 'transactionCodeItemText': []
# # , 'transactionCodeItemReportText': []
# # , 'transactionCodeItemShortCode': []}
# # , {'participantAssignNumber': 2
# # , 'participantType': 'Agent'
# # , 'participantId': 'agwXpAfDwtQLiuiKbE3SinKQ'
# # , 'participantName': 'Lanardia Paschal'
# # , 'participantOfferTime': '2023-09-06T16:48:01.961-04:00'
# # , 'participantOfferAction': 'Accepted'
# # , 'participantOfferActionTime': '2023-09-06T16:48:07.552-04:00'
# # , 'participantOfferDuration': {'value': 5591, 'ongoing': False}
# # , 'participantHandlingEndTime': '2023-09-06T16:55:17.624-04:00'
# # , 'participantHandlingDuration': {'value': 430072, 'ongoing': False}
# # , 'participantWrapUpEndTime': '2023-09-06T16:55:19.331-04:00'
# # , 'participantWrapUpDuration': {'value': 1707, 'ongoing': False}
# # , 'participantProcessingDuration': {'value': 431779, 'ongoing': False}
# # , 'participantBusyDuration': {'value': 437370, 'ongoing': False}
# # , 'warmTransfersCompleted': 0
# # , 'blindTransferToAgent': 0
# # , 'blindTransferToQueue': 0
# # , 'consultationsEstablished': 0
# # , 'conferencesEstablished': 0
# # , 'participantHold': None
# # , 'participantHoldDuration': None
# # , 'participantLongestHoldDuration': None
# # , 'participantMute': None
# # , 'participantMuteDuration': None
# # , 'participantLongestMuteDuration': None
# # , 'transactionCodeListId': []
# # , 'transactionCodeListName': []
# # , 'transactionCodeItemId': []
# # , 'transactionCodeItemText': []
# # , 'transactionCodeItemReportText': []
# # , 'transactionCodeItemShortCode': []}]"""

# from pyspark.sql import SparkSession
# from pyspark.sql.functions import from_json, col, collect_list
# from pyspark.sql.types import StructType, StructField, StringType, ArrayType

# data = """[{'participantAssignNumber': 1
# , 'participantType': 'Agent'
# , 'participantId': 'ag9K9PcuAlSdq8NBG6aQBAdg'
# , 'participantName': 'Jennifer Gray'
# , 'participantOfferTime': '2023-09-06T16:47:45.091-04:00'
# , 'participantOfferAction': 'OfferTimeout'
# , 'participantOfferActionTime': '2023-09-06T16:48:00.096-04:00'
# , 'participantOfferDuration': {'value': 15005, 'ongoing': False}
# , 'participantHandlingEndTime': 'None'
# , 'participantHandlingDuration': 'None'
# , 'participantWrapUpEndTime': 'None'
# , 'participantWrapUpDuration': 'None'
# , 'participantProcessingDuration': 'None'
# , 'participantBusyDuration': {'value': 15005, 'ongoing': False}
# , 'warmTransfersCompleted': 0
# , 'blindTransferToAgent': 0
# , 'blindTransferToQueue': 0
# , 'consultationsEstablished': 0
# , 'conferencesEstablished': 0
# , 'participantHold': 'None'
# , 'participantHoldDuration': 'None'
# , 'participantLongestHoldDuration': 'None'
# , 'participantMute': 'None'
# , 'participantMuteDuration': 'None'
# , 'participantLongestMuteDuration': 'None'
# }
# , {'participantAssignNumber': 2
# , 'participantType': 'Agent'
# , 'participantId': 'agwXpAfDwtQLiuiKbE3SinKQ'
# , 'participantName': 'Lanardia Paschal'
# , 'participantOfferTime': '2023-09-06T16:48:01.961-04:00'
# , 'participantOfferAction': 'Accepted'
# , 'participantOfferActionTime': '2023-09-06T16:48:07.552-04:00'
# , 'participantOfferDuration': {'value': 5591, 'ongoing': False}
# , 'participantHandlingEndTime': '2023-09-06T16:55:17.624-04:00'
# , 'participantHandlingDuration': {'value': 430072, 'ongoing': False}
# , 'participantWrapUpEndTime': '2023-09-06T16:55:19.331-04:00'
# , 'participantWrapUpDuration': {'value': 1707, 'ongoing': False}
# , 'participantProcessingDuration': {'value': 431779, 'ongoing': False}
# , 'participantBusyDuration': {'value': 437370, 'ongoing': False}
# , 'warmTransfersCompleted': 0
# , 'blindTransferToAgent': 0
# , 'blindTransferToQueue': 0
# , 'consultationsEstablished': 0
# , 'conferencesEstablished': 0
# , 'participantHold': 'None'
# , 'participantHoldDuration': 'None'
# , 'participantLongestHoldDuration': 'None'
# , 'participantMute': 'None'
# , 'participantMuteDuration': 'None'
# , 'participantLongestMuteDuration': 'None'
# }]"""


# participants_schema = StructType([
#     StructField('participantAssignNumber', IntegerType(), True),
#     StructField('participantType', StringType(), True),
#     StructField('participantId', StringType(), True),
#     StructField('participantName', StringType(), True),
#     StructField('participantOfferTime', StringType(), True),
#     StructField('participantOfferAction', StringType(), True),
#     StructField('participantOfferActionTime', StringType(), True),
#     StructField('participantOfferDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantHandlingEndTime', StringType(), True),
#     StructField('participantHandlingDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantWrapUpEndTime', StringType(), True),
#     StructField('participantWrapUpDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantProcessingDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantBusyDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('warmTransfersCompleted', IntegerType(), True),
#     StructField('blindTransferToAgent', IntegerType(), True),
#     StructField('blindTransferToQueue', IntegerType(), True),
#     StructField('consultationsEstablished', IntegerType(), True),
#     StructField('conferencesEstablished', IntegerType(), True),
#     StructField('participantHold', StringType(), True),
#     StructField('participantHoldDuration', StringType(), True),
#     StructField('participantLongestHoldDuration', StringType(), True),
#     StructField('participantMute', StringType(), True),
#     StructField('participantMuteDuration', StringType(), True),
#     StructField('participantLongestMuteDuration', StringType(), True)
# ])
      

# # Create a DataFrame with a single column named "data"
# schema = StructType([StructField("data", StringType(), True)])
# df = spark.createDataFrame([(data,)], schema)
# display(df)

# # Use from_json to parse the "data" column into an array of structs
# df = df.withColumn("parsed_data", from_json(col("data"), ArrayType(participants_schema)))
# display(df.select("parsed_data.participantName"))
# # Use collect_list to collect 'name' values into a list
# ##result_df = df.select(collect_list("parsed_data.name").alias("name_list"))

# #display(result_df)

In [None]:
# from pyspark.sql import SparkSession
# from pyspark.sql.functions import from_json, col, collect_list
# from pyspark.sql.types import StructType, StructField, StringType, ArrayType,MapType,IntegerType,BooleanType
# import json

# # Create a SparkSession
# #spark = SparkSession.builder.appName("example").getOrCreate()

# # Define the JSON data as a string
# #data = """[{'participantAssignNumber': 1,'participantType': 'Agent','participantId':'ag9K9PcuAlSdq8NBG6aQBAdg', 'participantName': 'Jennifer Gray', 'participantOfferTime': '2023-09-06T16:47:45.091-04:00', 'participantOfferAction': 'OfferTimeout', 'participantOfferActionTime': '2023-09-06T16:48:00.096-04:00', 'participantOfferDuration': {'value': 15005, 'ongoing': False}, 'participantHandlingEndTime': None, 'participantHandlingDuration': None, 'participantWrapUpEndTime': None, 'participantWrapUpDuration': None, 'participantProcessingDuration': None, 'participantBusyDuration': {'value': 15005, 'ongoing': False}, 'warmTransfersCompleted': 0, 'blindTransferToAgent': 0, 'blindTransferToQueue': 0, 'consultationsEstablished': 0, 'conferencesEstablished': 0, 'participantHold': None, 'participantHoldDuration': None, 'participantLongestHoldDuration': None, 'participantMute': None, 'participantMuteDuration': None, 'participantLongestMuteDuration': None}, {'participantAssignNumber': 2, 'participantType': 'Agent', 'participantId': 'agwXpAfDwtQLiuiKbE3SinKQ', 'participantName': 'Lanardia Paschal', 'participantOfferTime': '2023-09-06T16:48:01.961-04:00', 'participantOfferAction': 'Accepted', 'participantOfferActionTime': '2023-09-06T16:48:07.552-04:00', 'participantOfferDuration': {'value': 5591, 'ongoing': False}, 'participantHandlingEndTime': '2023-09-06T16:55:17.624-04:00', 'participantHandlingDuration': {'value': 430072, 'ongoing': False}, 'participantWrapUpEndTime': '2023-09-06T16:55:19.331-04:00', 'participantWrapUpDuration': {'value': 1707, 'ongoing': False}, 'participantProcessingDuration': {'value': 431779, 'ongoing': False}, 'participantBusyDuration': {'value': 437370, 'ongoing': False}, 'warmTransfersCompleted': 0, 'blindTransferToAgent': 0, 'blindTransferToQueue': 0, 'consultationsEstablished': 0, 'conferencesEstablished': 0, 'participantHold': None, 'participantHoldDuration': None, 'participantLongestHoldDuration': None, 'participantMute': None, 'participantMuteDuration': None, 'participantLongestMuteDuration': None}]"""
# data = """[
#     {'participantAssignNumber': 1
# ,'participantType': 'Agent'
# , 'participantId': 'ag9K9PcuAlSdq8NBG6aQBAdg'
# , 'participantName': 'Jennifer Gray'
# , 'participantOfferTime': '2023-09-06T16:47:45.091-04:00'
# , 'participantOfferAction': 'OfferTimeout'
# , 'participantOfferActionTime': '2023-09-06T16:48:00.096-04:00'
# , 'participantOfferDuration': {'value': 15005, 'ongoing': False}

# , 'blindTransferToAgent': 0


# }
# ,{
#     'participantAssignNumber': 1
#     ,'participantType': 'Agent'
#     , 'participantId': 'ag9K9PcuAlSdq8NBG6aQBAdg'
#     , 'participantName': 'Jennifer Gray'
#     , 'participantOfferTime': '2023-09-06T16:47:45.091-04:00'
#     , 'participantOfferAction': 'OfferTimeout'
#     , 'participantOfferActionTime': '2023-09-06T16:48:00.096-04:00'
# , 'participantOfferDuration':{'value': 15005, 'ongoing': False}

# , 'blindTransferToAgent': 0


# }


# ]"""

# # Convert the JSON string to a list of dictionaries
# #data_list = json.loads(data)

# # Create a PySpark DataFrame
# schema = StructType([StructField("data", StringType(), True)])
# df = spark.createDataFrame([(data,)], schema)

# display(df)
# participants_schema = StructType([StructField('participantAssignNumber', StringType(), True),
# StructField('participantType', StringType(), True),
# StructField('participantId', StringType(), True),
# StructField('participantName', StringType(), True),
# StructField('participantOfferAction', StringType(), True),
# StructField("participantOfferDuration", StructType([
#         StructField("value", IntegerType(), True),
#         StructField("ongoing", BooleanType(), True)
#     ]), True),

# ])
# df = df.withColumn("parsed_data", from_json(col("data"), ArrayType(participants_schema)))
# display(df)
# #display(df.select("parsed_data.participantAssignNumber","parsed_data.participantId","parsed_data.participantName","parsed_data.participantOfferAction"))
# # Show the DataFrame
# #df.show()


In [None]:
# from pyspark.sql import SparkSession
# from pyspark.sql.functions import from_json, col
# from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType

# # Initialize SparkSession
# #spark = SparkSession.builder.appName("JSONToDataFrame").getOrCreate()

# # Sample DataFrame with a column named 'data' containing JSON strings
# data = [
#     """{
#         "participantAssignNumber": 1,
#         "participantType": "Agent",
#         "participantId": "ag9K9PcuAlSdq8NBG6aQBAdg",
#         "participantName": "Jennifer Gray",
#         "participantOfferTime": "2023-09-06T16:47:45.091-04:00",
#         "participantOfferAction": "OfferTimeout",
#         "participantOfferActionTime": "2023-09-06T16:48:00.096-04:00",
#         "participantOfferDuration": {"value": 15005, "ongoing": false},
#         "blindTransferToAgent": 0
#     }""",
#     """{
#         "participantAssignNumber": 1,
#         "participantType": "Agent",
#         "participantId": "ag9K9PcuAlSdq8NBG6aQBAdg",
#         "participantName": "Jennifer Gray2",
#         "participantOfferTime": "2023-09-06T16:47:45.091-04:00",
#         "participantOfferAction": "OfferTimeout",
#         "participantOfferActionTime": "2023-09-06T16:48:00.096-04:00",
#         "participantOfferDuration": {"value": 15005, "ongoing": false},
#         "blindTransferToAgent": 0
#     }"""
# ]

# # Define the schema for parsing the JSON data
# participants_schema = StructType([
#     StructField("participantAssignNumber", IntegerType(), True),
#     StructField("participantType", StringType(), True),
#     StructField("participantId", StringType(), True),
#     StructField("participantName", StringType(), True),
#     StructField("participantOfferAction", StringType(), True),
#     StructField("participantOfferDuration", StructType([
#         StructField("value", IntegerType(), True),
#         StructField("ongoing", BooleanType(), True)
#     ]), True),
# ])

# # Create a DataFrame with the 'data' column
# df = spark.createDataFrame([data], StringType())

# # Parse the 'data' column as JSON
# df = df.withColumn("parsed_data", from_json(col("value"), ArrayType(participants_schema)))

# display(df.select("parsed_data.participantName"))


In [None]:
# from pyspark.sql import SparkSession
# from pyspark.sql.functions import from_json, col
# from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BooleanType,ArrayType
# participants_schema =  StructType([
#     StructField('participantAssignNumber', IntegerType(), True),
#     StructField('participantType', StringType(), True),
#     StructField('participantId', StringType(), True),
#     StructField('participantName', StringType(), True),
#     StructField('participantOfferTime', StringType(), True),
#     StructField('participantOfferAction', StringType(), True),
#     StructField('participantOfferActionTime', StringType(), True),
#     StructField('participantOfferDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantHandlingEndTime', StringType(), True),
#     StructField('participantHandlingDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantWrapUpEndTime', StringType(), True),
#     StructField('participantWrapUpDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantProcessingDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('participantBusyDuration', StructType([
#         StructField('value', IntegerType(), True),
#         StructField('ongoing', BooleanType(), True)
#     ]), True),
#     StructField('warmTransfersCompleted', IntegerType(), True),
#     StructField('blindTransferToAgent', IntegerType(), True),
#     StructField('blindTransferToQueue', IntegerType(), True),
#     StructField('consultationsEstablished', IntegerType(), True),
#     StructField('conferencesEstablished', IntegerType(), True),
#     StructField('participantHold', StringType(), True),
#     StructField('participantHoldDuration', StringType(), True),
#     StructField('participantLongestHoldDuration', StringType(), True),
#     StructField('participantMute', StringType(), True),
#     StructField('participantMuteDuration', StringType(), True),
#     StructField('participantLongestMuteDuration', StringType(), True)

# ,StructField('transactionCodeListId', StringType(), True),
# StructField('transactionCodeListName', StringType(), True),
# StructField('transactionCodeItemId', StringType(), True),
# StructField('transactionCodeItemText', StringType(), True),
# StructField('transactionCodeItemReportText', StringType(), True),
# StructField('transactionCodeItemShortCode', StringType(), True),
# ])  

# # participants_schema = StructType([StructField('participantAssignNumber', StringType(), True),
# # StructField('participantType', StringType(), True),
# # StructField('participantId', StringType(), True),
# # StructField('participantName', StringType(), True),
# # StructField('participantOfferAction', StringType(), True),
# # StructField("participantOfferDuration", StructType([
# #         StructField("value", IntegerType(), True),
# #         StructField("ongoing", BooleanType(), True)
# #     ]), True),


# # ])
# participants_schema = StructType([
#     StructField("participantAssignNumber", StringType(), True),
#     #StructField("participantType", StringType(), True),
#     StructField("participantId", StringType(), True),
#     StructField("participantName", StringType(), True),
#     StructField("participantOfferAction", StringType(), True),
    
# ])
# df = spark.sql(f"""select interactionId
#                    ,participants
#                    --,participantAssignNumber
#                    --,participantId
#                    ,participantName
#                    --,participantOfferAction
#   from delta.`abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/historical_analytics/incremental/20230921`
# where participants is not null""")
# df = df.withColumn("parsed_data", from_json(col("participants"), ArrayType(participants_schema)))
# #display(df)
# display(df.select("parsed_data.participantAssignNumber","parsed_data.participantId","parsed_data.participantName","parsed_data.participantOfferAction"))

In [None]:
#%%sql

#OPTIMIZE delta.`abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/historical_analytics/incremental/**`

In [None]:
# -- %%sql
# -- select distinct split(split(queueWaitDuration,',')[0],':')[1]/1000
# -- --participantAssignNumber,participantId,participantName,participantOfferAction
# --   from delta.`abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/historical_analytics/incremental/20230921`
# --   where interactionId = 'int-18aafa907ce-TwSMHTchKftrlWWF9DQHK2zbZ-phone-03-wolverineworldwid01'
# -- ;
