In [1]:
## 3.5 for ehr review 

In [4]:
import pandas as pd
df = pd.read_csv('results.csv')

In [5]:
df.shape

(3984, 46)

In [6]:
import os
import re
import json
import base64
import requests
import time
import urllib.parse


API_KEY = 'x'  ##### Paste your API key between the quotes #####
API_VERSION = '2024-06-01'  # For the most recent production release: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
RESOURCE_ENDPOINT = 'https://unified-api.ucsf.edu/general'  # no trailing slash--this is used by libraries as a partial URL
DEPLOYMENT_NAME = "gpt-35-turbo-16k" 

In [8]:
import pandas as pd
import requests
import time
import os
import json
from tqdm import tqdm  # For progress bar

# Function to get a summary from Azure OpenAI
def get_summary(chief_complaint, discharge_summary, age, sex):
    # Check if key fields are missing
    if pd.isna(chief_complaint) or pd.isna(discharge_summary):
        return None  # Skip if any key field is missing
    
    url = f"{RESOURCE_ENDPOINT}/openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version={API_VERSION}"
    
    headers = {
        "Content-Type": "application/json",
        "api-key": API_KEY
    }
    
    payload = {
        "messages": [
            {"role": "system", "content": "You are an experienced emergency department (ED) physician creating a one-liner for a NEW patient who has just arrived at the ED. The patient's past medical records are available to you. Your task is to summarize the patient's relevant PAST medical history and end with their CURRENT chief complaint that is given with no adjectives about the chief complaint as you can NOT assume anything about their current condition. All notes and medical records provided are from PAST encounters, not the current visit."},
            {"role": "user", "content": f"Create a concise one-liner summary for a patient who has just arrived at the Emergency Department. The one-liner must:\n\n"
                                      f"1. Start with demographic information (age, sex). Example of a one liner:  80 y.o. old male, with h/o of HFpEF (EF 55-60% 05/20/22), HTN, HLD, and bipolar disorder presenting with shortness of breath. \n"
                                      f"2. Include a concise summary of relevant PAST medical history from previous visits/notes\n"
                                      f"3. End with just CURRENT presenting chief complaint that is not capitilized in the summary and does have additional information regarding the chief complaint: '{chief_complaint}'\n\n"
                                      f"IMPORTANT: Everything in the notes is from PAST encounters. The patient is NOW presenting with a NEW complaint: '{chief_complaint}'.\n\n"
                                      f"Age: {age}\n"
                                      f"Sex: {sex}\n"
                                      f"PAST Medical Records:\n{discharge_summary}"}
        ],
        "temperature": 0.1,
        "max_tokens": 4096
    }
    
    retries = 0
    max_retries = 5  # Increased from 3 to 5
    backoff_factor = 2  # For exponential backoff
    
    while retries < max_retries:
        try:
            response = requests.post(url, headers=headers, json=payload)
            
            # Handle rate limiting (status code 429) or other 4xx errors
            if response.status_code == 429 or (response.status_code >= 400 and response.status_code < 500):
                wait_time = (backoff_factor ** retries) * 2  # Exponential backoff
                print(f"Rate limit hit or error {response.status_code}. Waiting for {wait_time} seconds before retry...")
                time.sleep(wait_time)
                retries += 1
                continue
                
            response.raise_for_status()  # Raise an error for other non-200 responses
            return response.json()["choices"][0]["message"]["content"].strip()
            
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying {retries+1}/{max_retries}...")
            wait_time = (backoff_factor ** retries) * 2  # Exponential backoff
            time.sleep(wait_time)
            retries += 1
    
    return None  # Return None if all retries fail

# Function to process dataframe with checkpoint saving
def process_dataframe_with_checkpoints(df, checkpoint_file="35_processing_checkpoint.json", output_file="35_ehr_review.csv", batch_size=10):
    # Check if there's a checkpoint to resume from
    start_index = 0
    if os.path.exists(checkpoint_file):
        with open(checkpoint_file, 'r') as f:
            checkpoint_data = json.load(f)
            start_index = checkpoint_data.get('last_processed_index', 0) + 1
            print(f"Resuming from index {start_index}")
            
            # If there's a partially processed CSV, load it
            if os.path.exists(output_file):
                saved_df = pd.read_csv(output_file)
                # Ensure it has the Generated_Summary column
                if 'Generated_Summary' not in saved_df.columns:
                    saved_df['Generated_Summary'] = None
                # Transfer any already processed summaries
                for idx in range(start_index):
                    if idx < len(df) and idx < len(saved_df):
                        if not pd.isna(saved_df.loc[idx, 'Generated_Summary']):
                            df.loc[idx, 'Generated_Summary'] = saved_df.loc[idx, 'Generated_Summary']

    # Initialize Generated_Summary column if it doesn't exist
    if 'Generated_Summary' not in df.columns:
        df['Generated_Summary'] = None
    
    # Process in batches with progress bar
    total_rows = len(df)
    progress_bar = tqdm(total=total_rows, initial=start_index, desc="Processing records")
    
    for i in range(start_index, total_rows):
        row = df.iloc[i]
        
        # Process the current row
        summary = get_summary(
            row["primarychiefcomplaintname"], 
            row["Discharge_Summary_Text"],
            row["Age"],
            row["sex"]
        )
        
        # Update dataframe
        df.loc[i, 'Generated_Summary'] = summary
        
        # Update progress bar
        progress_bar.update(1)
        
        # Add delay between API calls to prevent rate limiting
        time.sleep(1)  # Wait 1 second between calls
        
        # Save checkpoint and intermediate results after each batch
        if (i + 1) % batch_size == 0 or i == total_rows - 1:
            # Save checkpoint
            with open(checkpoint_file, 'w') as f:
                json.dump({'last_processed_index': i}, f)
            
            # Save current results
            df.to_csv(output_file, index=False)
            print(f"\nCheckpoint saved at index {i}")
    
    progress_bar.close()
    print(f"Processing complete. Results saved to {output_file}")
    
    # Clean up checkpoint file when done
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)
    
    return df

# Apply the processing function to the dataframe
df = process_dataframe_with_checkpoints(df)

Processing records:   0%|          | 10/3984 [00:31<4:04:59,  3.70s/it]


Checkpoint saved at index 9


Processing records:   1%|          | 20/3984 [01:28<2:47:39,  2.54s/it] 


Checkpoint saved at index 19


Processing records:   1%|          | 30/3984 [02:00<2:40:02,  2.43s/it]


Checkpoint saved at index 29


Processing records:   1%|          | 40/3984 [03:07<2:53:35,  2.64s/it] 


Checkpoint saved at index 39


Processing records:   1%|▏         | 50/3984 [03:33<2:31:43,  2.31s/it]


Checkpoint saved at index 49


Processing records:   2%|▏         | 60/3984 [04:06<2:40:48,  2.46s/it]


Checkpoint saved at index 59


Processing records:   2%|▏         | 70/3984 [04:34<2:18:14,  2.12s/it]


Checkpoint saved at index 69


Processing records:   2%|▏         | 80/3984 [05:00<2:15:34,  2.08s/it]


Checkpoint saved at index 79


Processing records:   2%|▏         | 90/3984 [05:30<2:27:24,  2.27s/it]


Checkpoint saved at index 89


Processing records:   3%|▎         | 100/3984 [06:13<2:31:48,  2.35s/it]


Checkpoint saved at index 99


Processing records:   3%|▎         | 110/3984 [06:46<2:15:54,  2.10s/it]


Checkpoint saved at index 109


Processing records:   3%|▎         | 120/3984 [07:10<2:07:50,  1.99s/it]


Checkpoint saved at index 119


Processing records:   3%|▎         | 128/3984 [07:32<2:21:08,  2.20s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:   3%|▎         | 130/3984 [08:41<16:51:19, 15.74s/it]


Checkpoint saved at index 129


Processing records:   4%|▎         | 140/3984 [09:08<3:10:23,  2.97s/it] 


Checkpoint saved at index 139


Processing records:   4%|▍         | 150/3984 [09:37<2:20:49,  2.20s/it]


Checkpoint saved at index 149


Processing records:   4%|▍         | 160/3984 [10:02<2:06:23,  1.98s/it]


Checkpoint saved at index 159


Processing records:   4%|▍         | 170/3984 [10:31<2:30:42,  2.37s/it]


Checkpoint saved at index 169


Processing records:   5%|▍         | 180/3984 [10:57<2:13:34,  2.11s/it]


Checkpoint saved at index 179


Processing records:   5%|▍         | 190/3984 [11:23<2:15:29,  2.14s/it]


Checkpoint saved at index 189


Processing records:   5%|▌         | 200/3984 [11:50<2:29:04,  2.36s/it]


Checkpoint saved at index 199


Processing records:   5%|▌         | 210/3984 [12:16<2:20:10,  2.23s/it]


Checkpoint saved at index 209


Processing records:   6%|▌         | 220/3984 [12:42<2:08:57,  2.06s/it]


Checkpoint saved at index 219


Processing records:   6%|▌         | 230/3984 [13:10<2:18:18,  2.21s/it]


Checkpoint saved at index 229


Processing records:   6%|▌         | 240/3984 [13:35<2:08:55,  2.07s/it]


Checkpoint saved at index 239


Processing records:   6%|▋         | 250/3984 [14:01<2:08:46,  2.07s/it]


Checkpoint saved at index 249


Processing records:   7%|▋         | 260/3984 [14:26<2:05:00,  2.01s/it]


Checkpoint saved at index 259


Processing records:   7%|▋         | 270/3984 [14:53<2:22:35,  2.30s/it]


Checkpoint saved at index 269


Processing records:   7%|▋         | 280/3984 [15:18<2:09:09,  2.09s/it]


Checkpoint saved at index 279


Processing records:   7%|▋         | 290/3984 [15:47<2:38:11,  2.57s/it]


Checkpoint saved at index 289


Processing records:   8%|▊         | 300/3984 [16:14<2:14:04,  2.18s/it]


Checkpoint saved at index 299


Processing records:   8%|▊         | 310/3984 [16:40<2:07:12,  2.08s/it]


Checkpoint saved at index 309


Processing records:   8%|▊         | 320/3984 [17:08<2:22:02,  2.33s/it]


Checkpoint saved at index 319


Processing records:   8%|▊         | 330/3984 [17:35<2:15:42,  2.23s/it]


Checkpoint saved at index 329


Processing records:   9%|▊         | 340/3984 [18:02<2:10:00,  2.14s/it]


Checkpoint saved at index 339


Processing records:   9%|▉         | 350/3984 [18:28<2:06:44,  2.09s/it]


Checkpoint saved at index 349


Processing records:   9%|▉         | 360/3984 [18:53<2:02:19,  2.03s/it]


Checkpoint saved at index 359


Processing records:   9%|▉         | 370/3984 [19:23<2:29:04,  2.47s/it]


Checkpoint saved at index 369


Processing records:  10%|▉         | 380/3984 [19:49<2:09:35,  2.16s/it]


Checkpoint saved at index 379


Processing records:  10%|▉         | 390/3984 [20:15<2:23:12,  2.39s/it]


Checkpoint saved at index 389


Processing records:  10%|█         | 400/3984 [20:41<2:18:18,  2.32s/it]


Checkpoint saved at index 399


Processing records:  10%|█         | 410/3984 [21:07<2:10:59,  2.20s/it]


Checkpoint saved at index 409


Processing records:  11%|█         | 420/3984 [21:35<2:11:11,  2.21s/it]


Checkpoint saved at index 419


Processing records:  11%|█         | 430/3984 [22:06<2:30:02,  2.53s/it]


Checkpoint saved at index 429


Processing records:  11%|█         | 440/3984 [22:30<1:53:54,  1.93s/it]


Checkpoint saved at index 439


Processing records:  11%|█▏        | 450/3984 [22:56<2:10:34,  2.22s/it]


Checkpoint saved at index 449


Processing records:  12%|█▏        | 460/3984 [23:23<2:17:37,  2.34s/it]


Checkpoint saved at index 459


Processing records:  12%|█▏        | 470/3984 [23:49<2:07:07,  2.17s/it]


Checkpoint saved at index 469


Processing records:  12%|█▏        | 472/3984 [23:59<3:09:36,  3.24s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  12%|█▏        | 480/3984 [25:20<3:34:21,  3.67s/it] 


Checkpoint saved at index 479


Processing records:  12%|█▏        | 490/3984 [25:46<2:07:35,  2.19s/it]


Checkpoint saved at index 489


Processing records:  13%|█▎        | 500/3984 [26:12<2:08:26,  2.21s/it]


Checkpoint saved at index 499


Processing records:  13%|█▎        | 510/3984 [26:39<2:27:01,  2.54s/it]


Checkpoint saved at index 509


Processing records:  13%|█▎        | 520/3984 [27:04<1:55:13,  2.00s/it]


Checkpoint saved at index 519


Processing records:  13%|█▎        | 530/3984 [27:31<2:04:40,  2.17s/it]


Checkpoint saved at index 529


Processing records:  14%|█▎        | 540/3984 [27:56<2:04:05,  2.16s/it]


Checkpoint saved at index 539


Processing records:  14%|█▍        | 550/3984 [28:21<1:54:23,  2.00s/it]


Checkpoint saved at index 549


Processing records:  14%|█▍        | 560/3984 [28:47<1:58:00,  2.07s/it]


Checkpoint saved at index 559


Processing records:  14%|█▍        | 570/3984 [29:16<2:17:04,  2.41s/it]


Checkpoint saved at index 569


Processing records:  15%|█▍        | 580/3984 [29:44<2:08:15,  2.26s/it]


Checkpoint saved at index 579


Processing records:  15%|█▍        | 590/3984 [30:10<2:04:10,  2.20s/it]


Checkpoint saved at index 589


Processing records:  15%|█▌        | 600/3984 [30:36<1:58:29,  2.10s/it]


Checkpoint saved at index 599


Processing records:  15%|█▌        | 610/3984 [31:02<1:59:17,  2.12s/it]


Checkpoint saved at index 609


Processing records:  16%|█▌        | 620/3984 [31:28<2:00:43,  2.15s/it]


Checkpoint saved at index 619


Processing records:  16%|█▌        | 630/3984 [31:55<2:06:00,  2.25s/it]


Checkpoint saved at index 629


Processing records:  16%|█▌        | 640/3984 [32:21<1:54:42,  2.06s/it]


Checkpoint saved at index 639


Processing records:  16%|█▋        | 650/3984 [32:48<1:58:31,  2.13s/it]


Checkpoint saved at index 649


Processing records:  17%|█▋        | 660/3984 [33:15<1:59:28,  2.16s/it]


Checkpoint saved at index 659


Processing records:  17%|█▋        | 670/3984 [33:47<2:20:18,  2.54s/it]


Checkpoint saved at index 669


Processing records:  17%|█▋        | 680/3984 [34:15<2:05:03,  2.27s/it]


Checkpoint saved at index 679


Processing records:  17%|█▋        | 690/3984 [34:44<2:03:42,  2.25s/it]


Checkpoint saved at index 689


Processing records:  18%|█▊        | 700/3984 [35:13<2:03:14,  2.25s/it]


Checkpoint saved at index 699


Processing records:  18%|█▊        | 710/3984 [35:39<1:56:44,  2.14s/it]


Checkpoint saved at index 709


Processing records:  18%|█▊        | 720/3984 [36:04<1:48:52,  2.00s/it]


Checkpoint saved at index 719


Processing records:  18%|█▊        | 730/3984 [36:35<2:13:24,  2.46s/it]


Checkpoint saved at index 729


Processing records:  19%|█▊        | 740/3984 [37:02<1:55:37,  2.14s/it]


Checkpoint saved at index 739


Processing records:  19%|█▉        | 750/3984 [37:27<1:49:36,  2.03s/it]


Checkpoint saved at index 749


Processing records:  19%|█▉        | 760/3984 [37:53<1:52:58,  2.10s/it]


Checkpoint saved at index 759


Processing records:  19%|█▉        | 770/3984 [38:19<1:54:20,  2.13s/it]


Checkpoint saved at index 769


Processing records:  20%|█▉        | 780/3984 [38:44<1:50:57,  2.08s/it]


Checkpoint saved at index 779


Processing records:  20%|█▉        | 790/3984 [39:12<2:02:14,  2.30s/it]


Checkpoint saved at index 789


Processing records:  20%|██        | 800/3984 [39:41<2:03:35,  2.33s/it]


Checkpoint saved at index 799


Processing records:  20%|██        | 810/3984 [40:08<1:56:12,  2.20s/it]


Checkpoint saved at index 809


Processing records:  21%|██        | 820/3984 [40:37<2:06:19,  2.40s/it]


Checkpoint saved at index 819


Processing records:  21%|██        | 830/3984 [41:03<1:57:56,  2.24s/it]


Checkpoint saved at index 829


Processing records:  21%|██        | 840/3984 [41:29<1:54:51,  2.19s/it]


Checkpoint saved at index 839


Processing records:  21%|██▏       | 850/3984 [41:57<2:05:55,  2.41s/it]


Checkpoint saved at index 849


Processing records:  22%|██▏       | 860/3984 [42:24<2:00:16,  2.31s/it]


Checkpoint saved at index 859


Processing records:  22%|██▏       | 870/3984 [42:52<1:58:01,  2.27s/it]


Checkpoint saved at index 869


Processing records:  22%|██▏       | 880/3984 [43:18<1:47:16,  2.07s/it]


Checkpoint saved at index 879


Processing records:  22%|██▏       | 890/3984 [43:46<1:55:17,  2.24s/it]


Checkpoint saved at index 889


Processing records:  23%|██▎       | 900/3984 [44:12<1:46:55,  2.08s/it]


Checkpoint saved at index 899


Processing records:  23%|██▎       | 910/3984 [44:40<1:51:39,  2.18s/it]


Checkpoint saved at index 909


Processing records:  23%|██▎       | 920/3984 [45:07<1:58:11,  2.31s/it]


Checkpoint saved at index 919


Processing records:  23%|██▎       | 930/3984 [45:37<2:25:37,  2.86s/it]


Checkpoint saved at index 929


Processing records:  24%|██▎       | 940/3984 [46:03<1:49:09,  2.15s/it]


Checkpoint saved at index 939


Processing records:  24%|██▍       | 950/3984 [46:29<1:51:12,  2.20s/it]


Checkpoint saved at index 949


Processing records:  24%|██▍       | 960/3984 [46:56<1:54:10,  2.27s/it]


Checkpoint saved at index 959


Processing records:  24%|██▍       | 970/3984 [47:22<1:51:42,  2.22s/it]


Checkpoint saved at index 969


Processing records:  25%|██▍       | 980/3984 [47:49<1:52:58,  2.26s/it]


Checkpoint saved at index 979


Processing records:  25%|██▍       | 990/3984 [48:14<1:44:59,  2.10s/it]


Checkpoint saved at index 989


Processing records:  25%|██▌       | 1000/3984 [48:41<1:50:14,  2.22s/it]


Checkpoint saved at index 999


Processing records:  25%|██▌       | 1010/3984 [49:09<1:46:55,  2.16s/it]


Checkpoint saved at index 1009


Processing records:  26%|██▌       | 1020/3984 [49:35<1:45:47,  2.14s/it]


Checkpoint saved at index 1019


Processing records:  26%|██▌       | 1030/3984 [50:04<1:54:05,  2.32s/it]


Checkpoint saved at index 1029


Processing records:  26%|██▌       | 1040/3984 [50:31<1:47:23,  2.19s/it]


Checkpoint saved at index 1039


Processing records:  26%|██▋       | 1050/3984 [51:18<3:26:28,  4.22s/it]


Checkpoint saved at index 1049


Processing records:  27%|██▋       | 1060/3984 [52:43<2:29:15,  3.06s/it] 


Checkpoint saved at index 1059


Processing records:  27%|██▋       | 1070/3984 [53:11<1:44:45,  2.16s/it]


Checkpoint saved at index 1069


Processing records:  27%|██▋       | 1080/3984 [54:01<2:01:51,  2.52s/it]


Checkpoint saved at index 1079


Processing records:  27%|██▋       | 1090/3984 [54:28<1:47:14,  2.22s/it]


Checkpoint saved at index 1089


Processing records:  28%|██▊       | 1100/3984 [54:54<1:44:34,  2.18s/it]


Checkpoint saved at index 1099


Processing records:  28%|██▊       | 1110/3984 [55:24<1:54:03,  2.38s/it]


Checkpoint saved at index 1109


Processing records:  28%|██▊       | 1120/3984 [55:51<1:48:24,  2.27s/it]


Checkpoint saved at index 1119


Processing records:  28%|██▊       | 1130/3984 [56:17<1:43:24,  2.17s/it]


Checkpoint saved at index 1129


Processing records:  29%|██▊       | 1140/3984 [56:44<1:54:09,  2.41s/it]


Checkpoint saved at index 1139


Processing records:  29%|██▉       | 1150/3984 [57:11<1:44:43,  2.22s/it]


Checkpoint saved at index 1149


Processing records:  29%|██▉       | 1160/3984 [57:38<1:44:03,  2.21s/it]


Checkpoint saved at index 1159


Processing records:  29%|██▉       | 1170/3984 [58:05<1:38:40,  2.10s/it]


Checkpoint saved at index 1169


Processing records:  30%|██▉       | 1180/3984 [58:30<1:38:16,  2.10s/it]


Checkpoint saved at index 1179


Processing records:  30%|██▉       | 1190/3984 [58:59<1:38:40,  2.12s/it]


Checkpoint saved at index 1189


Processing records:  30%|███       | 1200/3984 [59:27<2:02:55,  2.65s/it]


Checkpoint saved at index 1199


Processing records:  30%|███       | 1210/3984 [59:54<1:50:24,  2.39s/it]


Checkpoint saved at index 1209


Processing records:  31%|███       | 1220/3984 [1:00:20<1:36:33,  2.10s/it]


Checkpoint saved at index 1219


Processing records:  31%|███       | 1230/3984 [1:00:46<1:40:59,  2.20s/it]


Checkpoint saved at index 1229


Processing records:  31%|███       | 1240/3984 [1:01:17<1:46:49,  2.34s/it]


Checkpoint saved at index 1239


Processing records:  31%|███▏      | 1250/3984 [1:01:43<1:37:38,  2.14s/it]


Checkpoint saved at index 1249


Processing records:  32%|███▏      | 1260/3984 [1:02:09<1:34:34,  2.08s/it]


Checkpoint saved at index 1259


Processing records:  32%|███▏      | 1270/3984 [1:02:35<1:33:27,  2.07s/it]


Checkpoint saved at index 1269


Processing records:  32%|███▏      | 1280/3984 [1:03:03<1:44:05,  2.31s/it]


Checkpoint saved at index 1279


Processing records:  32%|███▏      | 1290/3984 [1:03:30<1:37:15,  2.17s/it]


Checkpoint saved at index 1289


Processing records:  33%|███▎      | 1300/3984 [1:03:55<1:29:19,  2.00s/it]


Checkpoint saved at index 1299


Processing records:  33%|███▎      | 1310/3984 [1:04:22<1:41:24,  2.28s/it]


Checkpoint saved at index 1309


Processing records:  33%|███▎      | 1320/3984 [1:04:50<1:43:05,  2.32s/it]


Checkpoint saved at index 1319


Processing records:  33%|███▎      | 1330/3984 [1:05:16<1:38:59,  2.24s/it]


Checkpoint saved at index 1329


Processing records:  34%|███▎      | 1340/3984 [1:05:45<1:53:05,  2.57s/it]


Checkpoint saved at index 1339


Processing records:  34%|███▍      | 1350/3984 [1:06:10<1:34:28,  2.15s/it]


Checkpoint saved at index 1349


Processing records:  34%|███▍      | 1360/3984 [1:06:35<1:29:28,  2.05s/it]


Checkpoint saved at index 1359


Processing records:  34%|███▍      | 1370/3984 [1:07:01<1:29:07,  2.05s/it]


Checkpoint saved at index 1369


Processing records:  35%|███▍      | 1380/3984 [1:07:27<1:31:18,  2.10s/it]


Checkpoint saved at index 1379


Processing records:  35%|███▍      | 1390/3984 [1:07:52<1:29:12,  2.06s/it]


Checkpoint saved at index 1389


Processing records:  35%|███▌      | 1400/3984 [1:08:18<1:30:10,  2.09s/it]


Checkpoint saved at index 1399


Processing records:  35%|███▌      | 1410/3984 [1:08:44<1:29:21,  2.08s/it]


Checkpoint saved at index 1409


Processing records:  36%|███▌      | 1420/3984 [1:09:11<1:37:12,  2.27s/it]


Checkpoint saved at index 1419


Processing records:  36%|███▌      | 1430/3984 [1:09:36<1:28:31,  2.08s/it]


Checkpoint saved at index 1429


Processing records:  36%|███▌      | 1440/3984 [1:10:03<1:32:09,  2.17s/it]


Checkpoint saved at index 1439


Processing records:  36%|███▋      | 1450/3984 [1:10:30<1:25:42,  2.03s/it]


Checkpoint saved at index 1449


Processing records:  37%|███▋      | 1460/3984 [1:10:54<1:26:22,  2.05s/it]


Checkpoint saved at index 1459
Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  37%|███▋      | 1470/3984 [1:12:27<2:11:36,  3.14s/it] 


Checkpoint saved at index 1469


Processing records:  37%|███▋      | 1480/3984 [1:12:54<1:29:27,  2.14s/it]


Checkpoint saved at index 1479


Processing records:  37%|███▋      | 1490/3984 [1:13:20<1:27:21,  2.10s/it]


Checkpoint saved at index 1489


Processing records:  38%|███▊      | 1500/3984 [1:13:47<1:36:47,  2.34s/it]


Checkpoint saved at index 1499


Processing records:  38%|███▊      | 1510/3984 [1:14:16<1:34:44,  2.30s/it]


Checkpoint saved at index 1509


Processing records:  38%|███▊      | 1520/3984 [1:14:44<1:33:42,  2.28s/it]


Checkpoint saved at index 1519


Processing records:  38%|███▊      | 1530/3984 [1:15:15<1:42:30,  2.51s/it]


Checkpoint saved at index 1529


Processing records:  39%|███▊      | 1540/3984 [1:15:42<1:28:10,  2.16s/it]


Checkpoint saved at index 1539


Processing records:  39%|███▉      | 1550/3984 [1:16:10<1:29:51,  2.22s/it]


Checkpoint saved at index 1549
Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  39%|███▉      | 1560/3984 [1:17:41<1:57:20,  2.90s/it] 


Checkpoint saved at index 1559


Processing records:  39%|███▉      | 1570/3984 [1:18:07<1:21:35,  2.03s/it]


Checkpoint saved at index 1569


Processing records:  40%|███▉      | 1580/3984 [1:18:36<1:41:49,  2.54s/it]


Checkpoint saved at index 1579


Processing records:  40%|███▉      | 1590/3984 [1:19:03<1:26:19,  2.16s/it]


Checkpoint saved at index 1589


Processing records:  40%|████      | 1600/3984 [1:19:29<1:28:26,  2.23s/it]


Checkpoint saved at index 1599


Processing records:  40%|████      | 1610/3984 [1:19:57<1:34:08,  2.38s/it]


Checkpoint saved at index 1609
Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  41%|████      | 1620/3984 [1:21:34<2:03:14,  3.13s/it] 


Checkpoint saved at index 1619


Processing records:  41%|████      | 1630/3984 [1:22:00<1:22:22,  2.10s/it]


Checkpoint saved at index 1629


Processing records:  41%|████      | 1640/3984 [1:22:27<1:25:40,  2.19s/it]


Checkpoint saved at index 1639


Processing records:  41%|████▏     | 1650/3984 [1:22:54<1:36:08,  2.47s/it]


Checkpoint saved at index 1649


Processing records:  42%|████▏     | 1656/3984 [1:23:13<1:39:07,  2.55s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  42%|████▏     | 1660/3984 [1:24:28<5:54:16,  9.15s/it] 


Checkpoint saved at index 1659


Processing records:  42%|████▏     | 1670/3984 [1:24:56<1:36:38,  2.51s/it]


Checkpoint saved at index 1669


Processing records:  42%|████▏     | 1680/3984 [1:25:24<1:25:28,  2.23s/it]


Checkpoint saved at index 1679


Processing records:  42%|████▏     | 1690/3984 [1:25:50<1:21:02,  2.12s/it]


Checkpoint saved at index 1689


Processing records:  43%|████▎     | 1700/3984 [1:26:18<1:36:09,  2.53s/it]


Checkpoint saved at index 1699


Processing records:  43%|████▎     | 1710/3984 [1:26:44<1:18:26,  2.07s/it]


Checkpoint saved at index 1709


Processing records:  43%|████▎     | 1720/3984 [1:27:11<1:27:34,  2.32s/it]


Checkpoint saved at index 1719


Processing records:  43%|████▎     | 1730/3984 [1:27:38<1:21:55,  2.18s/it]


Checkpoint saved at index 1729


Processing records:  44%|████▎     | 1740/3984 [1:28:04<1:24:03,  2.25s/it]


Checkpoint saved at index 1739


Processing records:  44%|████▍     | 1750/3984 [1:28:33<1:25:47,  2.30s/it]


Checkpoint saved at index 1749


Processing records:  44%|████▍     | 1760/3984 [1:29:02<1:33:44,  2.53s/it]


Checkpoint saved at index 1759


Processing records:  44%|████▍     | 1770/3984 [1:29:33<1:49:32,  2.97s/it]


Checkpoint saved at index 1769


Processing records:  45%|████▍     | 1780/3984 [1:29:59<1:12:36,  1.98s/it]


Checkpoint saved at index 1779


Processing records:  45%|████▍     | 1790/3984 [1:30:25<1:16:22,  2.09s/it]


Checkpoint saved at index 1789


Processing records:  45%|████▌     | 1800/3984 [1:30:52<1:16:32,  2.10s/it]


Checkpoint saved at index 1799


Processing records:  45%|████▌     | 1810/3984 [1:31:20<1:28:41,  2.45s/it]


Checkpoint saved at index 1809


Processing records:  46%|████▌     | 1820/3984 [1:31:49<1:32:02,  2.55s/it]


Checkpoint saved at index 1819


Processing records:  46%|████▌     | 1830/3984 [1:32:15<1:16:03,  2.12s/it]


Checkpoint saved at index 1829


Processing records:  46%|████▌     | 1840/3984 [1:32:42<1:18:07,  2.19s/it]


Checkpoint saved at index 1839


Processing records:  46%|████▋     | 1850/3984 [1:33:08<1:13:31,  2.07s/it]


Checkpoint saved at index 1849


Processing records:  47%|████▋     | 1860/3984 [1:33:33<1:15:04,  2.12s/it]


Checkpoint saved at index 1859


Processing records:  47%|████▋     | 1870/3984 [1:33:59<1:11:30,  2.03s/it]


Checkpoint saved at index 1869


Processing records:  47%|████▋     | 1880/3984 [1:34:26<1:19:22,  2.26s/it]


Checkpoint saved at index 1879


Processing records:  47%|████▋     | 1890/3984 [1:34:52<1:11:27,  2.05s/it]


Checkpoint saved at index 1889


Processing records:  48%|████▊     | 1900/3984 [1:35:18<1:18:04,  2.25s/it]


Checkpoint saved at index 1899


Processing records:  48%|████▊     | 1910/3984 [1:35:44<1:13:11,  2.12s/it]


Checkpoint saved at index 1909


Processing records:  48%|████▊     | 1920/3984 [1:36:11<1:14:07,  2.15s/it]


Checkpoint saved at index 1919


Processing records:  48%|████▊     | 1930/3984 [1:36:39<1:19:48,  2.33s/it]


Checkpoint saved at index 1929


Processing records:  49%|████▊     | 1940/3984 [1:37:03<1:07:46,  1.99s/it]


Checkpoint saved at index 1939


Processing records:  49%|████▉     | 1950/3984 [1:37:29<1:11:37,  2.11s/it]


Checkpoint saved at index 1949


Processing records:  49%|████▉     | 1957/3984 [1:37:49<1:15:46,  2.24s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  49%|████▉     | 1960/3984 [1:38:59<6:27:15, 11.48s/it] 


Checkpoint saved at index 1959


Processing records:  49%|████▉     | 1969/3984 [1:39:25<1:31:54,  2.74s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  49%|████▉     | 1970/3984 [1:40:32<12:21:43, 22.10s/it]


Checkpoint saved at index 1969


Processing records:  50%|████▉     | 1980/3984 [1:41:00<1:40:30,  3.01s/it] 


Checkpoint saved at index 1979


Processing records:  50%|████▉     | 1990/3984 [1:41:26<1:10:21,  2.12s/it]


Checkpoint saved at index 1989


Processing records:  50%|█████     | 2000/3984 [1:41:51<1:10:13,  2.12s/it]


Checkpoint saved at index 1999


Processing records:  50%|█████     | 2010/3984 [1:42:15<1:05:07,  1.98s/it]


Checkpoint saved at index 2009


Processing records:  51%|█████     | 2020/3984 [1:42:45<1:25:26,  2.61s/it]


Checkpoint saved at index 2019


Processing records:  51%|█████     | 2023/3984 [1:42:57<1:41:53,  3.12s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  51%|█████     | 2030/3984 [1:44:17<2:28:04,  4.55s/it] 


Checkpoint saved at index 2029


Processing records:  51%|█████     | 2040/3984 [1:44:47<1:37:08,  3.00s/it]


Checkpoint saved at index 2039


Processing records:  51%|█████▏    | 2050/3984 [1:45:12<1:06:58,  2.08s/it]


Checkpoint saved at index 2049


Processing records:  52%|█████▏    | 2060/3984 [1:45:39<1:10:49,  2.21s/it]


Checkpoint saved at index 2059


Processing records:  52%|█████▏    | 2065/3984 [1:45:56<1:21:05,  2.54s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  52%|█████▏    | 2070/3984 [1:47:10<3:34:37,  6.73s/it] 


Checkpoint saved at index 2069


Processing records:  52%|█████▏    | 2080/3984 [1:47:37<1:11:23,  2.25s/it]


Checkpoint saved at index 2079


Processing records:  52%|█████▏    | 2090/3984 [1:48:03<1:09:26,  2.20s/it]


Checkpoint saved at index 2089


Processing records:  53%|█████▎    | 2100/3984 [1:48:30<1:10:22,  2.24s/it]


Checkpoint saved at index 2099


Processing records:  53%|█████▎    | 2110/3984 [1:48:56<1:06:20,  2.12s/it]


Checkpoint saved at index 2109


Processing records:  53%|█████▎    | 2120/3984 [1:49:22<1:04:09,  2.07s/it]


Checkpoint saved at index 2119


Processing records:  53%|█████▎    | 2130/3984 [1:49:49<1:07:11,  2.17s/it]


Checkpoint saved at index 2129


Processing records:  54%|█████▎    | 2140/3984 [1:50:15<1:07:32,  2.20s/it]


Checkpoint saved at index 2139


Processing records:  54%|█████▍    | 2150/3984 [1:50:40<1:06:10,  2.16s/it]


Checkpoint saved at index 2149


Processing records:  54%|█████▍    | 2160/3984 [1:51:06<1:06:54,  2.20s/it]


Checkpoint saved at index 2159


Processing records:  54%|█████▍    | 2170/3984 [1:51:33<1:08:59,  2.28s/it]


Checkpoint saved at index 2169


Processing records:  55%|█████▍    | 2180/3984 [1:51:58<1:07:12,  2.24s/it]


Checkpoint saved at index 2179


Processing records:  55%|█████▍    | 2190/3984 [1:52:24<1:03:28,  2.12s/it]


Checkpoint saved at index 2189


Processing records:  55%|█████▌    | 2200/3984 [1:52:51<1:03:15,  2.13s/it]


Checkpoint saved at index 2199


Processing records:  55%|█████▌    | 2210/3984 [1:53:16<59:48,  2.02s/it]  


Checkpoint saved at index 2209


Processing records:  56%|█████▌    | 2220/3984 [1:53:43<1:05:21,  2.22s/it]


Checkpoint saved at index 2219


Processing records:  56%|█████▌    | 2230/3984 [1:54:12<1:09:36,  2.38s/it]


Checkpoint saved at index 2229


Processing records:  56%|█████▌    | 2240/3984 [1:54:38<1:04:27,  2.22s/it]


Checkpoint saved at index 2239


Processing records:  56%|█████▋    | 2250/3984 [1:55:04<1:01:43,  2.14s/it]


Checkpoint saved at index 2249


Processing records:  57%|█████▋    | 2260/3984 [1:55:31<1:06:51,  2.33s/it]


Checkpoint saved at index 2259


Processing records:  57%|█████▋    | 2270/3984 [1:55:59<1:02:39,  2.19s/it]


Checkpoint saved at index 2269


Processing records:  57%|█████▋    | 2280/3984 [1:56:24<56:32,  1.99s/it]  


Checkpoint saved at index 2279


Processing records:  57%|█████▋    | 2290/3984 [1:56:49<1:00:25,  2.14s/it]


Checkpoint saved at index 2289


Processing records:  58%|█████▊    | 2300/3984 [1:57:15<58:26,  2.08s/it]  


Checkpoint saved at index 2299


Processing records:  58%|█████▊    | 2310/3984 [1:57:44<1:06:12,  2.37s/it]


Checkpoint saved at index 2309


Processing records:  58%|█████▊    | 2320/3984 [1:58:13<1:08:31,  2.47s/it]


Checkpoint saved at index 2319


Processing records:  58%|█████▊    | 2330/3984 [1:58:41<1:09:43,  2.53s/it]


Checkpoint saved at index 2329


Processing records:  59%|█████▊    | 2338/3984 [1:59:03<1:01:39,  2.25s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  59%|█████▊    | 2340/3984 [2:00:12<7:12:46, 15.79s/it]


Checkpoint saved at index 2339


Processing records:  59%|█████▉    | 2350/3984 [2:00:38<1:07:55,  2.49s/it]


Checkpoint saved at index 2349


Processing records:  59%|█████▉    | 2360/3984 [2:01:05<1:01:10,  2.26s/it]


Checkpoint saved at index 2359


Processing records:  59%|█████▉    | 2370/3984 [2:01:31<57:35,  2.14s/it]  


Checkpoint saved at index 2369


Processing records:  60%|█████▉    | 2380/3984 [2:01:58<57:26,  2.15s/it]  


Checkpoint saved at index 2379


Processing records:  60%|█████▉    | 2390/3984 [2:02:26<1:06:53,  2.52s/it]


Checkpoint saved at index 2389


Processing records:  60%|██████    | 2400/3984 [2:02:53<59:19,  2.25s/it]  


Checkpoint saved at index 2399


Processing records:  60%|██████    | 2410/3984 [2:03:18<55:01,  2.10s/it]  


Checkpoint saved at index 2409


Processing records:  61%|██████    | 2420/3984 [2:03:44<57:28,  2.20s/it]  


Checkpoint saved at index 2419


Processing records:  61%|██████    | 2425/3984 [2:03:59<59:48,  2.30s/it]  

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  61%|██████    | 2430/3984 [2:05:14<2:54:29,  6.74s/it]


Checkpoint saved at index 2429


Processing records:  61%|██████    | 2440/3984 [2:05:41<1:04:43,  2.51s/it]


Checkpoint saved at index 2439


Processing records:  61%|██████▏   | 2450/3984 [2:06:06<51:23,  2.01s/it]  


Checkpoint saved at index 2449


Processing records:  62%|██████▏   | 2460/3984 [2:06:32<59:13,  2.33s/it]  


Checkpoint saved at index 2459


Processing records:  62%|██████▏   | 2470/3984 [2:06:57<49:18,  1.95s/it]  


Checkpoint saved at index 2469


Processing records:  62%|██████▏   | 2480/3984 [2:07:22<52:31,  2.10s/it]  


Checkpoint saved at index 2479


Processing records:  62%|██████▎   | 2490/3984 [2:07:52<1:07:48,  2.72s/it]


Checkpoint saved at index 2489


Processing records:  63%|██████▎   | 2500/3984 [2:08:19<55:27,  2.24s/it]  


Checkpoint saved at index 2499


Processing records:  63%|██████▎   | 2510/3984 [2:08:44<49:29,  2.01s/it]  


Checkpoint saved at index 2509


Processing records:  63%|██████▎   | 2520/3984 [2:09:11<49:12,  2.02s/it]  


Checkpoint saved at index 2519


Processing records:  64%|██████▎   | 2530/3984 [2:09:39<55:25,  2.29s/it]  


Checkpoint saved at index 2529


Processing records:  64%|██████▍   | 2540/3984 [2:10:11<1:16:35,  3.18s/it]


Checkpoint saved at index 2539


Processing records:  64%|██████▍   | 2550/3984 [2:10:41<54:03,  2.26s/it]  


Checkpoint saved at index 2549


Processing records:  64%|██████▍   | 2560/3984 [2:11:07<49:25,  2.08s/it]  


Checkpoint saved at index 2559


Processing records:  65%|██████▍   | 2570/3984 [2:11:33<50:57,  2.16s/it]  


Checkpoint saved at index 2569


Processing records:  65%|██████▍   | 2580/3984 [2:11:59<47:59,  2.05s/it]  


Checkpoint saved at index 2579


Processing records:  65%|██████▌   | 2590/3984 [2:12:26<52:04,  2.24s/it]  


Checkpoint saved at index 2589


Processing records:  65%|██████▌   | 2600/3984 [2:12:51<48:02,  2.08s/it]  


Checkpoint saved at index 2599


Processing records:  66%|██████▌   | 2610/3984 [2:13:17<50:26,  2.20s/it]  


Checkpoint saved at index 2609


Processing records:  66%|██████▌   | 2620/3984 [2:13:44<54:04,  2.38s/it]  


Checkpoint saved at index 2619


Processing records:  66%|██████▌   | 2630/3984 [2:14:10<50:02,  2.22s/it]  


Checkpoint saved at index 2629


Processing records:  66%|██████▌   | 2632/3984 [2:14:19<1:14:27,  3.30s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  66%|██████▌   | 2637/3984 [2:15:35<2:35:44,  6.94s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  66%|██████▋   | 2640/3984 [2:16:45<4:53:16, 13.09s/it]


Checkpoint saved at index 2639


Processing records:  67%|██████▋   | 2650/3984 [2:17:11<57:34,  2.59s/it]  


Checkpoint saved at index 2649


Processing records:  67%|██████▋   | 2660/3984 [2:17:36<48:06,  2.18s/it]  


Checkpoint saved at index 2659


Processing records:  67%|██████▋   | 2670/3984 [2:18:04<52:06,  2.38s/it]  


Checkpoint saved at index 2669


Processing records:  67%|██████▋   | 2680/3984 [2:18:31<49:19,  2.27s/it]  


Checkpoint saved at index 2679


Processing records:  68%|██████▊   | 2690/3984 [2:18:58<48:45,  2.26s/it]  


Checkpoint saved at index 2689


Processing records:  68%|██████▊   | 2700/3984 [2:19:26<48:48,  2.28s/it]  


Checkpoint saved at index 2699


Processing records:  68%|██████▊   | 2710/3984 [2:19:53<44:47,  2.11s/it]  


Checkpoint saved at index 2709


Processing records:  68%|██████▊   | 2720/3984 [2:20:18<46:10,  2.19s/it]  


Checkpoint saved at index 2719


Processing records:  69%|██████▊   | 2730/3984 [2:20:43<44:14,  2.12s/it]  


Checkpoint saved at index 2729


Processing records:  69%|██████▉   | 2740/3984 [2:21:11<46:34,  2.25s/it]  


Checkpoint saved at index 2739


Processing records:  69%|██████▉   | 2750/3984 [2:21:38<49:52,  2.42s/it]  


Checkpoint saved at index 2749


Processing records:  69%|██████▉   | 2760/3984 [2:22:04<46:59,  2.30s/it]  


Checkpoint saved at index 2759


Processing records:  70%|██████▉   | 2770/3984 [2:22:30<45:16,  2.24s/it]  


Checkpoint saved at index 2769


Processing records:  70%|██████▉   | 2780/3984 [2:23:03<46:16,  2.31s/it]  


Checkpoint saved at index 2779


Processing records:  70%|███████   | 2790/3984 [2:23:28<39:27,  1.98s/it]  


Checkpoint saved at index 2789


Processing records:  70%|███████   | 2800/3984 [2:23:54<40:49,  2.07s/it]  


Checkpoint saved at index 2799


Processing records:  71%|███████   | 2810/3984 [2:24:22<42:36,  2.18s/it]  


Checkpoint saved at index 2809


Processing records:  71%|███████   | 2820/3984 [2:24:48<37:55,  1.95s/it]  


Checkpoint saved at index 2819


Processing records:  71%|███████   | 2830/3984 [2:25:20<53:59,  2.81s/it]  


Checkpoint saved at index 2829


Processing records:  71%|███████▏  | 2840/3984 [2:25:46<42:48,  2.24s/it]  


Checkpoint saved at index 2839


Processing records:  72%|███████▏  | 2850/3984 [2:26:12<40:54,  2.16s/it]  


Checkpoint saved at index 2849


Processing records:  72%|███████▏  | 2860/3984 [2:26:39<41:06,  2.19s/it]  


Checkpoint saved at index 2859


Processing records:  72%|███████▏  | 2870/3984 [2:27:07<44:56,  2.42s/it]  


Checkpoint saved at index 2869


Processing records:  72%|███████▏  | 2880/3984 [2:27:33<38:50,  2.11s/it]  


Checkpoint saved at index 2879


Processing records:  73%|███████▎  | 2890/3984 [2:28:00<42:34,  2.34s/it]  


Checkpoint saved at index 2889


Processing records:  73%|███████▎  | 2900/3984 [2:28:26<39:45,  2.20s/it]  


Checkpoint saved at index 2899


Processing records:  73%|███████▎  | 2910/3984 [2:28:53<40:36,  2.27s/it]  


Checkpoint saved at index 2909


Processing records:  73%|███████▎  | 2920/3984 [2:29:18<35:12,  1.99s/it]  


Checkpoint saved at index 2919


Processing records:  74%|███████▎  | 2930/3984 [2:29:41<32:49,  1.87s/it]  


Checkpoint saved at index 2929


Processing records:  74%|███████▍  | 2940/3984 [2:30:08<44:00,  2.53s/it]


Checkpoint saved at index 2939


Processing records:  74%|███████▍  | 2950/3984 [2:30:35<36:43,  2.13s/it]  


Checkpoint saved at index 2949


Processing records:  74%|███████▍  | 2960/3984 [2:31:01<36:34,  2.14s/it]  


Checkpoint saved at index 2959


Processing records:  75%|███████▍  | 2970/3984 [2:31:27<35:27,  2.10s/it]  


Checkpoint saved at index 2969


Processing records:  75%|███████▍  | 2977/3984 [2:31:47<44:24,  2.65s/it]  

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  75%|███████▍  | 2980/3984 [2:32:57<3:15:27, 11.68s/it]


Checkpoint saved at index 2979


Processing records:  75%|███████▌  | 2990/3984 [2:33:23<38:07,  2.30s/it]  


Checkpoint saved at index 2989


Processing records:  75%|███████▌  | 3000/3984 [2:33:48<32:53,  2.01s/it]  


Checkpoint saved at index 2999


Processing records:  76%|███████▌  | 3010/3984 [2:34:14<33:11,  2.04s/it]


Checkpoint saved at index 3009


Processing records:  76%|███████▌  | 3020/3984 [2:34:38<30:57,  1.93s/it]


Checkpoint saved at index 3019


Processing records:  76%|███████▌  | 3030/3984 [2:35:04<34:14,  2.15s/it]


Checkpoint saved at index 3029


Processing records:  76%|███████▋  | 3040/3984 [2:35:29<31:23,  2.00s/it]


Checkpoint saved at index 3039


Processing records:  77%|███████▋  | 3050/3984 [2:35:54<32:23,  2.08s/it]


Checkpoint saved at index 3049


Processing records:  77%|███████▋  | 3060/3984 [2:36:21<33:31,  2.18s/it]


Checkpoint saved at index 3059


Processing records:  77%|███████▋  | 3070/3984 [2:36:47<30:48,  2.02s/it]


Checkpoint saved at index 3069


Processing records:  77%|███████▋  | 3080/3984 [2:37:14<36:25,  2.42s/it]


Checkpoint saved at index 3079


Processing records:  78%|███████▊  | 3090/3984 [2:37:40<30:41,  2.06s/it]


Checkpoint saved at index 3089


Processing records:  78%|███████▊  | 3100/3984 [2:38:06<33:19,  2.26s/it]


Checkpoint saved at index 3099


Processing records:  78%|███████▊  | 3110/3984 [2:38:33<32:17,  2.22s/it]


Checkpoint saved at index 3109


Processing records:  78%|███████▊  | 3120/3984 [2:38:59<30:47,  2.14s/it]


Checkpoint saved at index 3119


Processing records:  79%|███████▊  | 3130/3984 [2:39:26<30:46,  2.16s/it]


Checkpoint saved at index 3129


Processing records:  79%|███████▉  | 3140/3984 [2:39:51<28:14,  2.01s/it]


Checkpoint saved at index 3139


Processing records:  79%|███████▉  | 3150/3984 [2:40:17<28:44,  2.07s/it]


Checkpoint saved at index 3149


Processing records:  79%|███████▉  | 3160/3984 [2:40:41<28:10,  2.05s/it]


Checkpoint saved at index 3159


Processing records:  80%|███████▉  | 3170/3984 [2:41:06<26:36,  1.96s/it]


Checkpoint saved at index 3169


Processing records:  80%|███████▉  | 3180/3984 [2:41:31<26:33,  1.98s/it]


Checkpoint saved at index 3179


Processing records:  80%|████████  | 3190/3984 [2:41:56<27:12,  2.06s/it]


Checkpoint saved at index 3189


Processing records:  80%|████████  | 3200/3984 [2:42:21<26:16,  2.01s/it]


Checkpoint saved at index 3199


Processing records:  81%|████████  | 3210/3984 [2:42:49<31:59,  2.48s/it]


Checkpoint saved at index 3209


Processing records:  81%|████████  | 3220/3984 [2:43:16<27:40,  2.17s/it]


Checkpoint saved at index 3219


Processing records:  81%|████████  | 3230/3984 [2:43:43<25:55,  2.06s/it]


Checkpoint saved at index 3229


Processing records:  81%|████████▏ | 3240/3984 [2:44:08<26:31,  2.14s/it]


Checkpoint saved at index 3239


Processing records:  82%|████████▏ | 3250/3984 [2:44:35<28:06,  2.30s/it]


Checkpoint saved at index 3249


Processing records:  82%|████████▏ | 3260/3984 [2:45:01<26:18,  2.18s/it]


Checkpoint saved at index 3259


Processing records:  82%|████████▏ | 3270/3984 [2:45:27<25:12,  2.12s/it]


Checkpoint saved at index 3269


Processing records:  82%|████████▏ | 3280/3984 [2:45:53<24:20,  2.08s/it]


Checkpoint saved at index 3279


Processing records:  83%|████████▎ | 3290/3984 [2:46:20<24:58,  2.16s/it]


Checkpoint saved at index 3289


Processing records:  83%|████████▎ | 3297/3984 [2:46:40<26:30,  2.32s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  83%|████████▎ | 3300/3984 [2:47:52<2:15:37, 11.90s/it]


Checkpoint saved at index 3299


Processing records:  83%|████████▎ | 3310/3984 [2:48:31<53:40,  4.78s/it]  


Checkpoint saved at index 3309


Processing records:  83%|████████▎ | 3320/3984 [2:48:58<25:19,  2.29s/it]  


Checkpoint saved at index 3319


Processing records:  84%|████████▎ | 3330/3984 [2:49:25<24:23,  2.24s/it]


Checkpoint saved at index 3329


Processing records:  84%|████████▍ | 3340/3984 [2:49:51<22:51,  2.13s/it]


Checkpoint saved at index 3339


Processing records:  84%|████████▍ | 3350/3984 [2:50:18<21:36,  2.04s/it]


Checkpoint saved at index 3349


Processing records:  84%|████████▍ | 3360/3984 [2:50:44<24:00,  2.31s/it]


Checkpoint saved at index 3359


Processing records:  85%|████████▍ | 3370/3984 [2:51:10<21:13,  2.07s/it]


Checkpoint saved at index 3369


Processing records:  85%|████████▍ | 3380/3984 [2:51:35<20:38,  2.05s/it]


Checkpoint saved at index 3379


Processing records:  85%|████████▌ | 3390/3984 [2:52:01<21:28,  2.17s/it]


Checkpoint saved at index 3389


Processing records:  85%|████████▌ | 3400/3984 [2:52:26<21:12,  2.18s/it]


Checkpoint saved at index 3399


Processing records:  86%|████████▌ | 3410/3984 [2:52:53<21:41,  2.27s/it]


Checkpoint saved at index 3409


Processing records:  86%|████████▌ | 3420/3984 [2:53:18<18:20,  1.95s/it]


Checkpoint saved at index 3419


Processing records:  86%|████████▌ | 3430/3984 [2:53:44<19:13,  2.08s/it]


Checkpoint saved at index 3429


Processing records:  86%|████████▋ | 3440/3984 [2:54:11<20:45,  2.29s/it]


Checkpoint saved at index 3439


Processing records:  87%|████████▋ | 3450/3984 [2:54:38<20:46,  2.33s/it]


Checkpoint saved at index 3449


Processing records:  87%|████████▋ | 3460/3984 [2:55:05<20:56,  2.40s/it]


Checkpoint saved at index 3459


Processing records:  87%|████████▋ | 3470/3984 [2:55:31<16:52,  1.97s/it]


Checkpoint saved at index 3469


Processing records:  87%|████████▋ | 3480/3984 [2:55:59<18:38,  2.22s/it]


Checkpoint saved at index 3479


Processing records:  88%|████████▊ | 3490/3984 [2:56:25<17:00,  2.07s/it]


Checkpoint saved at index 3489


Processing records:  88%|████████▊ | 3500/3984 [2:56:49<16:14,  2.01s/it]


Checkpoint saved at index 3499


Processing records:  88%|████████▊ | 3510/3984 [2:57:14<15:12,  1.93s/it]


Checkpoint saved at index 3509


Processing records:  88%|████████▊ | 3520/3984 [2:57:40<16:24,  2.12s/it]


Checkpoint saved at index 3519


Processing records:  89%|████████▊ | 3530/3984 [2:58:04<15:44,  2.08s/it]


Checkpoint saved at index 3529


Processing records:  89%|████████▉ | 3540/3984 [2:58:30<15:39,  2.12s/it]


Checkpoint saved at index 3539


Processing records:  89%|████████▉ | 3550/3984 [2:58:56<16:11,  2.24s/it]


Checkpoint saved at index 3549


Processing records:  89%|████████▉ | 3560/3984 [2:59:22<15:40,  2.22s/it]


Checkpoint saved at index 3559


Processing records:  90%|████████▉ | 3570/3984 [2:59:48<15:19,  2.22s/it]


Checkpoint saved at index 3569


Processing records:  90%|████████▉ | 3580/3984 [3:00:14<14:48,  2.20s/it]


Checkpoint saved at index 3579


Processing records:  90%|█████████ | 3590/3984 [3:00:41<15:36,  2.38s/it]


Checkpoint saved at index 3589


Processing records:  90%|█████████ | 3600/3984 [3:01:05<13:12,  2.06s/it]


Checkpoint saved at index 3599


Processing records:  91%|█████████ | 3610/3984 [3:01:34<14:50,  2.38s/it]


Checkpoint saved at index 3609


Processing records:  91%|█████████ | 3620/3984 [3:02:09<13:22,  2.20s/it]


Checkpoint saved at index 3619


Processing records:  91%|█████████ | 3630/3984 [3:02:36<14:17,  2.42s/it]


Checkpoint saved at index 3629


Processing records:  91%|█████████▏| 3640/3984 [3:03:01<12:04,  2.10s/it]


Checkpoint saved at index 3639


Processing records:  92%|█████████▏| 3650/3984 [3:03:31<19:17,  3.46s/it]


Checkpoint saved at index 3649


Processing records:  92%|█████████▏| 3660/3984 [3:03:58<13:47,  2.55s/it]


Checkpoint saved at index 3659


Processing records:  92%|█████████▏| 3670/3984 [3:04:23<11:08,  2.13s/it]


Checkpoint saved at index 3669


Processing records:  92%|█████████▏| 3680/3984 [3:04:53<10:44,  2.12s/it]


Checkpoint saved at index 3679


Processing records:  93%|█████████▎| 3690/3984 [3:05:19<10:35,  2.16s/it]


Checkpoint saved at index 3689


Processing records:  93%|█████████▎| 3700/3984 [3:05:46<11:15,  2.38s/it]


Checkpoint saved at index 3699


Processing records:  93%|█████████▎| 3710/3984 [3:06:12<09:16,  2.03s/it]


Checkpoint saved at index 3709


Processing records:  93%|█████████▎| 3720/3984 [3:06:38<09:57,  2.26s/it]


Checkpoint saved at index 3719


Processing records:  94%|█████████▎| 3730/3984 [3:07:04<08:57,  2.11s/it]


Checkpoint saved at index 3729


Processing records:  94%|█████████▍| 3740/3984 [3:07:27<07:40,  1.89s/it]


Checkpoint saved at index 3739


Processing records:  94%|█████████▍| 3750/3984 [3:07:55<08:55,  2.29s/it]


Checkpoint saved at index 3749


Processing records:  94%|█████████▍| 3760/3984 [3:08:22<07:50,  2.10s/it]


Checkpoint saved at index 3759


Processing records:  95%|█████████▍| 3770/3984 [3:08:50<07:08,  2.00s/it]


Checkpoint saved at index 3769


Processing records:  95%|█████████▍| 3780/3984 [3:09:16<07:10,  2.11s/it]


Checkpoint saved at index 3779


Processing records:  95%|█████████▌| 3790/3984 [3:09:41<06:28,  2.00s/it]


Checkpoint saved at index 3789


Processing records:  95%|█████████▌| 3800/3984 [3:10:08<07:15,  2.37s/it]


Checkpoint saved at index 3799


Processing records:  96%|█████████▌| 3810/3984 [3:10:35<06:24,  2.21s/it]


Checkpoint saved at index 3809


Processing records:  96%|█████████▌| 3820/3984 [3:10:59<05:14,  1.92s/it]


Checkpoint saved at index 3819


Processing records:  96%|█████████▌| 3830/3984 [3:11:25<05:32,  2.16s/it]


Checkpoint saved at index 3829


Processing records:  96%|█████████▋| 3840/3984 [3:11:50<04:59,  2.08s/it]


Checkpoint saved at index 3839


Processing records:  97%|█████████▋| 3850/3984 [3:12:15<04:34,  2.05s/it]


Checkpoint saved at index 3849


Processing records:  97%|█████████▋| 3860/3984 [3:12:40<04:05,  1.98s/it]


Checkpoint saved at index 3859


Processing records:  97%|█████████▋| 3870/3984 [3:13:05<03:59,  2.10s/it]


Checkpoint saved at index 3869


Processing records:  97%|█████████▋| 3871/3984 [3:13:13<07:03,  3.74s/it]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...
Rate limit hit or error 400. Waiting for 32 seconds before retry...


Processing records:  97%|█████████▋| 3880/3984 [3:14:38<05:32,  3.20s/it]


Checkpoint saved at index 3879


Processing records:  98%|█████████▊| 3890/3984 [3:15:03<03:25,  2.19s/it]


Checkpoint saved at index 3889


Processing records:  98%|█████████▊| 3900/3984 [3:15:29<02:58,  2.13s/it]


Checkpoint saved at index 3899


Processing records:  98%|█████████▊| 3910/3984 [3:15:53<02:25,  1.97s/it]


Checkpoint saved at index 3909


Processing records:  98%|█████████▊| 3920/3984 [3:16:19<02:12,  2.07s/it]


Checkpoint saved at index 3919


Processing records:  99%|█████████▊| 3930/3984 [3:16:47<02:01,  2.24s/it]


Checkpoint saved at index 3929


Processing records:  99%|█████████▉| 3940/3984 [3:17:12<01:35,  2.17s/it]


Checkpoint saved at index 3939


Processing records:  99%|█████████▉| 3950/3984 [3:17:37<01:07,  1.99s/it]


Checkpoint saved at index 3949


Processing records:  99%|█████████▉| 3960/3984 [3:18:03<00:52,  2.20s/it]


Checkpoint saved at index 3959


Processing records: 100%|█████████▉| 3970/3984 [3:18:27<00:27,  1.94s/it]


Checkpoint saved at index 3969


Processing records: 100%|█████████▉| 3980/3984 [3:18:52<00:08,  2.02s/it]


Checkpoint saved at index 3979


Processing records: 100%|██████████| 3984/3984 [3:19:11<00:00,  3.00s/it]


Checkpoint saved at index 3983
Processing complete. Results saved to 35_ehr_review.csv





In [9]:
df.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'patientdurablekey',
       'encounterkey', 'ArrivalDateKey', 'DepartureDateKeyValue',
       'DepartureDateKey', 'DispositionDateKeyValue',
       'primarychiefcomplaintname', 'primaryeddiagnosisname', 'sex',
       'birthdate', 'firstrace', 'preferredlanguage',
       'highestlevelofeducation', 'maritalstatus', 'Age',
       'Discharge_Summary_Date', 'Discharge_Summary_Note_Key',
       'Progress_Note_Date', 'Progress_Note_Key', 'HP_Note_Date',
       'HP_Note_Key', 'Echo_Date', 'Echo_Key', 'Imaging_Date', 'Imaging_Key',
       'Consult_Date', 'Consult_Key', 'ED_Provider_Notes_Date',
       'ED_Provider_Notes_Key', 'ECG_Date', 'ECG_Key',
       'Discharge_Summary_Text', 'Progress_Note_Text', 'HP_Note_Text',
       'Echo_Text', 'Imaging_Text', 'Consult_Text', 'ECG_Text',
       'ED_Provider_Notes_Text', 'One_Sentence_Extracted', 'note_count',
       'acuitylevel', 'eddisposition', 'Generated_Summary'],
      dtype='object')

In [10]:
df.shape

(3984, 47)

In [13]:
import pandas as pd
import requests
import time
import os
import json
from tqdm import tqdm  # For progress bar

# Function to get a summary from Azure OpenAI
def get_summary(chief_complaint, discharge_summary, age, sex):
    # Check if key fields are missing
    if pd.isna(chief_complaint) or pd.isna(discharge_summary):
        return None  # Skip if any key field is missing
    
    url = f"{RESOURCE_ENDPOINT}/openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version={API_VERSION}"
    
    headers = {
        "Content-Type": "application/json",
        "api-key": API_KEY
    }
    
    payload = {
        "messages": [
            {"role": "system", "content": "You are an experienced emergency department (ED) physician creating a one-liner for a NEW patient who has just arrived at the ED. The patient's past medical records are available to you. Your task is to summarize the patient's relevant PAST medical history and end with their CURRENT chief complaint that is given with no adjectives about the chief complaint as you can NOT assume anything about their current condition. All notes and medical records provided are from PAST encounters, not the current visit."},
            {"role": "user", "content": f"Create a concise one-liner summary for a patient who has just arrived at the Emergency Department. The one-liner must:\n\n"
                                      f"1. Start with demographic information (age, sex). Example of a one liner:  80 y.o. old male, with h/o of HFpEF (EF 55-60% 05/20/22), HTN, HLD, and bipolar disorder presenting with shortness of breath. \n"
                                      f"2. Include a concise summary of relevant PAST medical history from previous visits/notes\n"
                                      f"3. End with just CURRENT presenting chief complaint that is not capitilized in the summary and does have additional information regarding the chief complaint: '{chief_complaint}'\n\n"
                                      f"IMPORTANT: Everything in the notes is from PAST encounters. The patient is NOW presenting with a NEW complaint: '{chief_complaint}'.\n\n"
                                      f"Age: {age}\n"
                                      f"Sex: {sex}\n"
                                      f"PAST Medical Records:\n{discharge_summary}"}
        ],
        "temperature": 0.1,
        "max_tokens": 4096
    }
    
    retries = 0
    max_retries = 5
    backoff_factor = 2
    
    while retries < max_retries:
        try:
            response = requests.post(url, headers=headers, json=payload)
            
            # Handle rate limiting (status code 429) or other 4xx errors
            if response.status_code == 429 or (response.status_code >= 400 and response.status_code < 500):
                wait_time = (backoff_factor ** retries) * 2  # Exponential backoff
                print(f"Rate limit hit or error {response.status_code}. Waiting for {wait_time} seconds before retry...")
                time.sleep(wait_time)
                retries += 1
                continue
                
            response.raise_for_status()  # Raise an error for other non-200 responses
            return response.json()["choices"][0]["message"]["content"].strip()
            
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying {retries+1}/{max_retries}...")
            wait_time = (backoff_factor ** retries) * 2  # Exponential backoff
            time.sleep(wait_time)
            retries += 1
    
    return None  # Return None if all retries fail

# Function to process dataframe with checkpoint saving - only process null values
def process_dataframe_with_checkpoints(df, checkpoint_file="final_35_processing_checkpoint.json", output_file="35_ehr_review.csv", batch_size=10):
    # Ensure Generated_Summary column exists
    if 'Generated_Summary' not in df.columns:
        df['Generated_Summary'] = None
    
    # Load existing data if available
    if os.path.exists(output_file):
        saved_df = pd.read_csv(output_file)
        if 'Generated_Summary' in saved_df.columns:
            # Copy over existing summaries to avoid reprocessing
            for idx in range(len(saved_df)):
                if idx < len(df) and not pd.isna(saved_df.loc[idx, 'Generated_Summary']):
                    df.loc[idx, 'Generated_Summary'] = saved_df.loc[idx, 'Generated_Summary']
    
    # Create a list of indices that need processing (where Generated_Summary is null)
    indices_to_process = [i for i in range(len(df)) if pd.isna(df.loc[i, 'Generated_Summary'])]
    
    # Load checkpoint if it exists
    start_idx = 0
    if os.path.exists(checkpoint_file):
        with open(checkpoint_file, 'r') as f:
            checkpoint_data = json.load(f)
            last_processed_index = checkpoint_data.get('last_processed_index', 0)
            # Find the position in our indices list
            for pos, idx in enumerate(indices_to_process):
                if idx > last_processed_index:
                    start_idx = pos
                    break
    
    # Skip if all rows are already processed
    if not indices_to_process:
        print("All rows already have summaries. No processing needed.")
        return df
    
    # Process rows with null values
    total_to_process = len(indices_to_process)
    print(f"Found {total_to_process} rows with null summaries to process")
    
    progress_bar = tqdm(total=total_to_process, initial=start_idx, desc="Processing records")
    
    # Process only rows with null values
    for pos in range(start_idx, len(indices_to_process)):
        i = indices_to_process[pos]
        row = df.iloc[i]
        
        # Process the current row
        summary = get_summary(
            row["primarychiefcomplaintname"], 
            row["Discharge_Summary_Text"],
            row["Age"],
            row["sex"]
        )
        
        # Update dataframe
        df.loc[i, 'Generated_Summary'] = summary
        
        # Update progress bar
        progress_bar.update(1)
        
        # Add delay between API calls to prevent rate limiting
        time.sleep(1)  # Wait 1 second between calls
        
        # Save checkpoint and intermediate results after each batch
        if (pos + 1) % batch_size == 0 or pos == len(indices_to_process) - 1:
            # Save checkpoint
            with open(checkpoint_file, 'w') as f:
                json.dump({'last_processed_index': i}, f)
            
            # Save current results
            df.to_csv(output_file, index=False)
            print(f"\nCheckpoint saved at index {i}, processed {pos+1}/{total_to_process} null values")
    
    progress_bar.close()
    print(f"Processing complete. Results saved to {output_file}")
    
    # Clean up checkpoint file when done
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)
    
    return df

# Apply the processing function to the dataframe
df = process_dataframe_with_checkpoints(df)

Found 17 rows with null summaries to process


Processing records:   0%|          | 0/17 [00:00<?, ?it/s]

Rate limit hit or error 400. Waiting for 2 seconds before retry...
Rate limit hit or error 400. Waiting for 4 seconds before retry...
Rate limit hit or error 400. Waiting for 8 seconds before retry...
Rate limit hit or error 400. Waiting for 16 seconds before retry...


KeyboardInterrupt: 

In [12]:
df.shape

(3984, 47)