### launching of SPs



In [1]:
from datetime import timedelta
from decimal import Decimal
from tqdm import tqdm
import pandas as pd

import pyodbc
from sql.config import SQL_SERVER, DB_NAME
import sql.naming_convention as nc
from sql.sql_requests import SQL_Communicator

In [4]:
def format_time_delta(duration: timedelta):
    # Get the total seconds
    total_seconds = duration.total_seconds()
    # days = duration.days
    hours = duration.seconds // 3600  # Convert seconds to hours
    minutes = (duration.seconds // 60) % 60  # Convert remaining seconds to minutes
    seconds = duration.seconds % 60
    if hours > 0:
        return f'{hours:02d}:{minutes:02d}:{int(seconds):02d}'
    if minutes > 0:
        return f'{minutes:02d}:{int(seconds):02d}'
    return f'{total_seconds:4.2f}'

In [7]:
entity_names =[
"Professional_Compensation",
"Professional_Person",
"Professional_BiographyPerson",
"Professional_Professional",
"Professional_ProfessionalCoverage",
"Professional_ProToProFunction",
]



durations = []
with SQL_Communicator() as sc:
    for en in tqdm(entity_names):
        print(f'flow for target entity {en}..')
        en2 = nc.default_rename(en)
        sp_names = [nc.pull_sp_name(en2), nc.merge_sp_name(en2)]
        stage_tbl = nc.stg_table_name(en)
        ent_durs, flow_td = [en2], timedelta()  
        for i, sp_name in enumerate(sp_names):
            exec_stm = f"EXEC {sp_name}"
            print(f'- {exec_stm}...')
            try:
                td = sc.get_execution_metrics(exec_stm)
                ent_durs.append(format_time_delta(td))
                flow_td += td
                print(f'  - duration {format_time_delta(td)}')
                if i == 0:  ## pull sp
                    ret = sc.get_table_size(stage_tbl)
                    rows_count, table_size = ret[0][0], ret[0][1]
                    print(f'rows {rows_count}, stage table size: {table_size}')
            except ex:
                print(f'execution failed {ex}')

        velocity = table_size / Decimal(flow_td.total_seconds()) if flow_td.total_seconds() else 0 
        ent_durs.extend([format_time_delta(flow_td),  rows_count, table_size, f'{velocity:7.2f}'])
        durations.append(ent_durs)
        # ent_durs.append(sum(ent_durs))


connecting...
Connection successful!


  0%|          | 0/6 [00:00<?, ?it/s]

flow for target entity Professional_Compensation..
- EXEC dbo.PullData_Professional_Compensation2_prc...
  - duration 02:52
rows 48222280, stage table size: 3795.203125
- EXEC dbo.MergeData_Professional_Compensation2_prc...


In [6]:
columns = ['Entity name', 'Pull SP', 'Merge SP',  'sum', 'rows_count', 'table_size, MB', 'velocity']
df = pd.DataFrame(durations, columns = columns)
df

Unnamed: 0,Entity name,Pull SP,Merge SP,sum,rows_count,"table_size, MB",velocity
0,AddtlCompany_Address2,01:10,02:15,03:26,18858666,1319.015625,6.39
1,AddtlCompany_BusinessDescription2,01:04,35.54,01:39,6774311,1747.703125,17.49
2,AddtlCompany_LongBusinessDescription2,2.18,1.63,3.81,77450,230.59375,60.55
3,CompanyBiteSize_CompanyBiteSize2,9.46,0.48,9.94,39625,2.203125,0.22
4,CompanyGeosOfInt_CompanyGeosOfInterest2,31.59,4.08,35.67,374445,15.140625,0.42
5,CompanyIndOfInt_CompanyIndustriesOfInterest2,5.20,4.81,10.01,840892,33.703125,3.37
6,CompanyStagesOfInt_CompanyStagesOfInterest2,7.11,1.31,8.41,163283,6.765625,0.8
7,GICS_GICS2,1.43,0.26,1.69,49569,2.453125,1.45
8,KeyDev_FutureEvent2,01:49,44.57,02:33,11343570,1911.34375,12.44
9,KeyDev_FutureEventToObjectToEventType2,28.58,32.14,01:00,7419249,273.414062,4.5
