### launching of SPs



In [1]:
from datetime import timedelta
from decimal import Decimal
from tqdm import tqdm
import pandas as pd

import pyodbc
from sql.config import SQL_SERVER, DB_NAME
import sql.naming_convention as nc
from sql.sql_requests import SQL_Communicator

In [2]:
def format_time_delta(duration: timedelta):
    # Get the total seconds
    total_seconds = duration.total_seconds()
    # days = duration.days
    hours = duration.seconds // 3600  # Convert seconds to hours
    minutes = (duration.seconds // 60) % 60  # Convert remaining seconds to minutes
    seconds = duration.seconds % 60
    if hours > 0:
        return f'{hours:02d}:{minutes:02d}:{int(seconds):02d}'
    if minutes > 0:
        return f'{minutes:02d}:{int(seconds):02d}'
    return f'{total_seconds:4.2f}'

In [4]:
entity_names =[
"Professional_Compensation",
"Professional_Person",
"Professional_BiographyPerson",
"Professional_Professional",
"Professional_ProfessionalCoverage",
"Professional_ProToProFunction",
]



durations = []
with SQL_Communicator() as sc:
    for en in tqdm(entity_names):
        print(f'flow for target entity {en}..')
        en2 = nc.default_rename(en)
        sp_names = [nc.pull_sp_name(en2), nc.merge_sp_name(en2)]
        stage_tbl = nc.stg_table_name(en)
        ent_durs, flow_td = [en2], timedelta()  
        for i, sp_name in enumerate(sp_names):
            exec_stm = f"EXEC {sp_name}"
            print(f'- {exec_stm}...')
            try:
                td = sc.get_execution_metrics(exec_stm)
                ent_durs.append(format_time_delta(td))
                flow_td += td
                print(f'  - duration {format_time_delta(td)}')
                if i == 0:  ## pull sp
                    rows_count, table_size = sc.get_table_size(stage_tbl)
                    print(f'rows {rows_count}, stage table size: {table_size}')
            except ex:
                print(f'execution failed {ex}')
        rows_count = rows_count /1000
        velocity = table_size / Decimal(flow_td.total_seconds()) if flow_td.total_seconds() else 0 
        ent_durs.extend([format_time_delta(flow_td), f'{rows_count:7.3f} , f'{table_size:7.2f}', f'{velocity:7.2f}'])
        durations.append(ent_durs)
        # ent_durs.append(sum(ent_durs))


connecting...
Connection successful!


  0%|          | 0/6 [00:00<?, ?it/s]

flow for target entity Professional_Compensation..
- EXEC dbo.PullData_Professional_Compensation2_prc...
  - duration 03:10
rows 48222280, stage table size: 3795.203125
- EXEC dbo.MergeData_Professional_Compensation2_prc...


 17%|█▋        | 1/6 [10:15<51:17, 615.46s/it]

  - duration 07:04
flow for target entity Professional_Person..
- EXEC dbo.PullData_Professional_Person2_prc...
  - duration 38.12
rows 5959980, stage table size: 361.023437
- EXEC dbo.MergeData_Professional_Person2_prc...


 33%|███▎      | 2/6 [11:41<20:15, 303.96s/it]

  - duration 47.73
flow for target entity Professional_BiographyPerson..
- EXEC dbo.PullData_Professional_BiographyPerson2_prc...
  - duration 01:30
rows 5367678, stage table size: 3037.609375
- EXEC dbo.MergeData_Professional_BiographyPerson2_prc...


 50%|█████     | 3/6 [13:49<11:11, 223.86s/it]

  - duration 37.64
flow for target entity Professional_Professional..
- EXEC dbo.PullData_Professional_Professional2_prc...
  - duration 04:20
rows 10688109, stage table size: 1825.593750
- EXEC dbo.MergeData_Professional_Professional2_prc...


 67%|██████▋   | 4/6 [23:31<12:10, 365.27s/it]

  - duration 05:21
flow for target entity Professional_ProfessionalCoverage..
- EXEC dbo.PullData_Professional_ProfessionalCoverage2_prc...
  - duration 0.98
rows 215352, stage table size: 7.578125
- EXEC dbo.MergeData_Professional_ProfessionalCoverage2_prc...


 83%|████████▎ | 5/6 [23:34<03:54, 234.35s/it]

  - duration 1.17
flow for target entity Professional_ProToProFunction..
- EXEC dbo.PullData_Professional_ProToProFunction2_prc...
  - duration 50.88
rows 17431418, stage table size: 1109.140625
- EXEC dbo.MergeData_Professional_ProToProFunction2_prc...


100%|██████████| 6/6 [26:02<00:00, 260.37s/it]

  - duration 01:37





In [18]:
columns = ['Entity name', 'Pull SP', 'Merge SP',  'sum', 'rows_count, K', 'table_size, MB', 'velocity, MB/sec']
df = pd.DataFrame(durations, columns = columns)
# df['table_size, MB'] = df ['table_size, MB'].astype('int')
# df['rows_count, K'] = df['rows_count, K']/1000
df

Unnamed: 0,Entity name,Pull SP,Merge SP,sum,"rows_count, K","table_size, MB",velocity
0,Professional_Compensation2,03:10,07:04,10:15,48222.28,3795,6.17
1,Professional_Person2,38.12,47.73,01:25,5959.98,361,4.21
2,Professional_BiographyPerson2,01:30,37.64,02:08,5367.678,3037,23.65
3,Professional_Professional2,04:20,05:21,09:41,10688.109,1825,3.14
4,Professional_ProfessionalCoverage2,0.98,1.17,2.15,215.352,7,3.52
5,Professional_ProToProFunction2,50.88,01:37,02:28,17431.418,1109,7.49
