In [1]:
import pandas as pd
import numpy as np

from transformers import AutoTokenizer, AutoModel
import torch
from pytorch_tabular.models.tab_transformer import TabTransformerModel
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Step 1: Load Dataset
def load_data(file_path):
    """Loads the dataset and provides an initial overview."""
    data = pd.read_csv(file_path)
    print("Initial Dataset Info:\n", data.info())
    print("\nSample Data:\n", data.head())
    return data
data = load_data("C:\\Users\\sriha\\Music\\Case Comps\\NEST\\Data\\category_updated.csv")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 257577 entries, 0 to 257576
Data columns (total 22 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Unnamed: 0                  257577 non-null  int64  
 1   NCT Number                  257577 non-null  object 
 2   Study Title                 257577 non-null  object 
 3   Study Status                257577 non-null  object 
 4   Brief Summary               257577 non-null  object 
 5   Conditions                  257577 non-null  object 
 6   Primary Outcome Measures    247086 non-null  object 
 7   Secondary Outcome Measures  185779 non-null  object 
 8   Other Outcome Measures      18272 non-null   object 
 9   Sponsor                     257577 non-null  object 
 10  Collaborators               83679 non-null   object 
 11  Sex                         257317 non-null  object 
 12  Age                         257577 non-null  object 
 13  Phases        

In [3]:
data = data.head()
data

Unnamed: 0.1,Unnamed: 0,NCT Number,Study Title,Study Status,Brief Summary,Conditions,Primary Outcome Measures,Secondary Outcome Measures,Other Outcome Measures,Sponsor,...,Age,Phases,Enrollment,Funder Type,Study Type,Study Design,Start Month,Start Quarter,Condition Category,Conditions_Category
0,0,NCT00559130,Efficacy Study of CytoSorb Hemoperfusion Devic...,Completed,The hypothesis of this study is use of CytoSor...,Acute Respiratory Distress Syndrome|Acute Lung...,Relative IL-6 levels as a percent (%) of basel...,"Ventilator Free Days, Reduction cytokines TNF-...",,"MedaSorb Technologies, Inc",...,"ADULT, OLDER_ADULT",,100.0,INDUSTRY,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: PAR...,11,4,Other Rare or Unclassified,Other Rare or Unclassified
1,1,NCT00937664,Safety and Tolerability Study of AZD7762 in Co...,Not_Completed,The primary purpose of this study is to find o...,Cancer|Solid Tumors|Advanced Solid Malignancies,Assessment of adverse events (based on CTCAE v...,Pharmacokinetic effect of AZD7762 when adminis...,,AstraZeneca,...,"ADULT, OLDER_ADULT",PHASE1,24.0,INDUSTRY,INTERVENTIONAL,Allocation: NON_RANDOMIZED|Intervention Model:...,7,3,Oncology,Oncology
2,2,NCT00441597,Does Atorvastatin Reduce Ischemia-Reperfusion ...,Completed,To study the impact of 3 day exposure to atorv...,Ischemia Reperfusion Injury|Cardiovascular Dis...,Annexin A 5 targeting in the non dominant then...,"workload during ischemic exercise, workload du...",,Radboud University Medical Center,...,ADULT,PHASE4,30.0,OTHER,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: CRO...,2,1,Other Rare or Unclassified,Other Rare or Unclassified
3,3,NCT03296228,Comparison of Dynamic Radiographs in Determini...,Completed,The purpose of this study is to identify the f...,Adolescent Idiopathic Scoliosis,Investigate the flexibility equivalence of dif...,Incorporate these findings into the Lenke Clas...,,The University of Hong Kong,...,"CHILD, ADULT",,134.0,OTHER,OBSERVATIONAL,Observational Model: |Time Perspective: p,-1,-1,Other Rare or Unclassified,Non-Oncology
4,4,NCT00421603,A Placebo-Controlled Study of Mixed Amphetamin...,Completed,"The proposed protocol is a double-blind, place...",Cocaine Dependence,Three Weeks of Continuous Cocaine Abstinence a...,,,New York State Psychiatric Institute,...,ADULT,PHASE2,81.0,OTHER,INTERVENTIONAL,Allocation: RANDOMIZED|Intervention Model: PAR...,2,1,Other Rare or Unclassified,Non-Oncology


In [4]:
clinicaltokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
clinicalmodel = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

In [3]:
# Combine all text attributes into a single column 'Unstructured'
data["Unstructured"] = data[
    ["Brief Summary", "Study Title", "Primary Outcome Measures", "Secondary Outcome Measures"]
].astype(str).agg(" [SEP] ".join, axis=1)

# Drop the original text columns
data = data.drop(columns=["Brief Summary", "Study Title", "Primary Outcome Measures", "Secondary Outcome Measures"])

# Display updated DataFrame
print(data)

        Unnamed: 0   NCT Number   Study Status  \
0                0  NCT00559130      Completed   
1                1  NCT00937664  Not_Completed   
2                2  NCT00441597      Completed   
3                3  NCT03296228      Completed   
4                4  NCT00421603      Completed   
...            ...          ...            ...   
257572      257572  NCT02360800      Completed   
257573      257573  NCT02352506      Completed   
257574      257574  NCT04996381      Completed   
257575      257575  NCT00380640      Completed   
257576      257576  NCT01844336      Completed   

                                               Conditions  \
0       Acute Respiratory Distress Syndrome|Acute Lung...   
1         Cancer|Solid Tumors|Advanced Solid Malignancies   
2       Ischemia Reperfusion Injury|Cardiovascular Dis...   
3                         Adolescent Idiopathic Scoliosis   
4                                      Cocaine Dependence   
...                              

In [13]:
data['Unstructured']

0         The hypothesis of this study is use of CytoSor...
1         The primary purpose of this study is to find o...
2         To study the impact of 3 day exposure to atorv...
3         The purpose of this study is to identify the f...
4         The proposed protocol is a double-blind, place...
                                ...                        
257572    Bleeding after redo cardiac surgery is a commo...
257573    Acute kidney injury (AKI) is a common complica...
257574    The investigators will develop an artificial i...
257575    The purpose of this study is to assess the eff...
257576    The purpose of this study is to evaluate the p...
Name: Unstructured, Length: 257577, dtype: object

In [7]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Function to extract ClinicalBERT embeddings in GPU-friendly batches
def clinical_extract_text_embeddings(text_list, batch_size=512):
    """Extracts embeddings for text data using ClinicalBERT with batch processing on GPU."""
    embeddings = []
    
    dataloader = DataLoader(text_list, batch_size=batch_size, shuffle=False, pin_memory=True)

    for batch_texts in tqdm(dataloader, desc="Processing Batches", unit="batch"):
        try:
            # Tokenization and moving inputs to GPU
            inputs = clinicaltokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True, max_length=512)

            # Ensure all tensors are moved to GPU
            inputs = {key: val.to(device, non_blocking=True) for key, val in inputs.items()}  

            with torch.no_grad():  # No gradient tracking for inference
                outputs = clinicalmodel(**inputs)

            # Compute mean pooling of token embeddings (Sentence Representation)
            batch_embeddings = outputs.last_hidden_state.mean(dim=1).to(device)  # Ensure GPU tensor

            embeddings.append(batch_embeddings)

            # Free GPU memory after every batch
            del inputs, outputs
            torch.cuda.empty_cache()

        except torch.cuda.OutOfMemoryError:
            print("\n⚠️ CUDA OOM Error: Reducing batch size and retrying...\n")
            torch.cuda.empty_cache()
            return clinical_extract_text_embeddings(text_list, batch_size=max(batch_size // 2, 1))  # Reduce batch size & retry

    if not embeddings:
        print("⚠️ No embeddings extracted for this chunk! Skipping...")
        return None  # Return None if extraction failed

    return torch.cat([e.to(device) for e in embeddings], dim=0).cpu().numpy()  # Ensure all tensors are on GPU before conversion

# Split dataset into 100 chunks
num_chunks = 1
chunk_size = len(data) // num_chunks

for i in range(num_chunks):
    print(f"\n🚀 Processing chunk {i+1}/{num_chunks}...")

    # Select subset of data for this chunk
    start_idx = i * chunk_size
    end_idx = (i + 1) * chunk_size if i != num_chunks - 1 else len(data)  # Last chunk gets remaining data
    chunk = data.iloc[start_idx:end_idx].copy()  # Use .copy() to avoid warnings

    # Convert text column to list
    clinical_text_data = chunk["Unstructured"].astype(str).tolist()

    # Extract embeddings using batch processing on GPU
    embeddings_cpu = clinical_extract_text_embeddings(clinical_text_data, batch_size=512)  # Returns NumPy array

    # **Fix: Skip saving if embeddings is None (means extraction failed)**
    if embeddings_cpu is None:
        print(f"⚠️ Skipping chunk {i+1} due to failed embedding extraction.")
        continue  # Skip to the next chunk

    # Create DataFrame with embedding columns
    clinical_embedding_df = pd.DataFrame(embeddings_cpu, index=chunk.index)
    clinical_embedding_df.columns = [f"Unstructured_embed_{j}" for j in range(embeddings_cpu.shape[1])]

    # Merge embeddings with original text in the chunk
    chunk = pd.concat([chunk, clinical_embedding_df], axis=1)

    # Save the chunk with both text and embeddings
    output_filename = f"NEST_chunk_clinical_{i+1}.csv"
    chunk.to_csv(output_filename, index=True)
    
    print(f"✅ Saved chunk {i+1} to {output_filename}")

    # Free GPU memory after each chunk
    del embeddings_cpu, clinical_embedding_df, chunk
    torch.cuda.empty_cache()

print("\n🎉 All chunks successfully saved as separate CSV files!")


Using device: cuda

🚀 Processing chunk 1/1...


Processing Batches:   0%|          | 0/1 [00:00<?, ?batch/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)

In [15]:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader

# Load BioBERT model & tokenizer
biotokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.1")
biomodel = AutoModel.from_pretrained("dmis-lab/biobert-base-cased-v1.1").to("cuda")  # Move model to GPU

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Function to extract BioBERT embeddings in GPU-friendly batches
def bio_extract_text_embeddings(text_list, batch_size=512):
    """Extracts embeddings for text data using BioBERT with batch processing on GPU."""
    embeddings = []
    
    dataloader = DataLoader(text_list, batch_size=batch_size, shuffle=False)

    for batch_texts in tqdm(dataloader, desc="Processing Batches", unit="batch"):
        try:
            # Tokenization and moving inputs to GPU
            inputs = biotokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
            inputs = {key: val.to(device) for key, val in inputs.items()}  # Ensure all tensors are on GPU
            
            with torch.no_grad():  # No gradient tracking for inference
                outputs = biomodel(**inputs)

            # Compute mean pooling of token embeddings (Sentence Representation)
            batch_embeddings = outputs.last_hidden_state.mean(dim=1)  # Stays on GPU

            embeddings.append(batch_embeddings)

            # Free GPU memory after every batch
            del inputs, outputs
            torch.cuda.empty_cache()

        except torch.cuda.OutOfMemoryError:
            print("\n⚠️ CUDA OOM Error: Reducing batch size and retrying...\n")
            torch.cuda.empty_cache()
            return bio_extract_text_embeddings(text_list, batch_size=max(batch_size // 2, 1))  # Reduce batch size & retry

    return torch.cat(embeddings, dim=0).cpu().numpy()  # Move embeddings to CPU & convert to NumPy

# Split dataset into 100 chunks
num_chunks = 100
chunk_size = len(data) // num_chunks

for i in range(num_chunks):
    print(f"\n🚀 Processing chunk {i+1}/{num_chunks}...")

    # Select subset of data for this chunk
    start_idx = i * chunk_size
    end_idx = (i + 1) * chunk_size if i != num_chunks - 1 else len(data)  # Last chunk gets remaining data
    chunk = data.iloc[start_idx:end_idx].copy()  # Use .copy() to avoid warnings

    # Convert text column to list
    bio_text_data = chunk["Unstructured"].astype(str).tolist()

    # Extract embeddings using batch processing on GPU
    embeddings_cpu = bio_extract_text_embeddings(bio_text_data, batch_size=512)  # Returns NumPy array

    # Create DataFrame with embedding columns
    bioembedding_df = pd.DataFrame(embeddings_cpu, index=chunk.index)
    bioembedding_df.columns = [f"Unstructured_embed_{j}" for j in range(embeddings_cpu.shape[1])]

    # Merge embeddings with original text in the chunk
    chunk = pd.concat([chunk, bioembedding_df], axis=1)

    # Save the chunk with both text and embeddings
    output_filename = f"NEST_chunk_{i+1}.csv"
    chunk.to_csv(output_filename, index=True)
    
    print(f"✅ Saved chunk {i+1} to {output_filename}")

    # Free GPU memory after each chunk
    del embeddings_cpu, bioembedding_df, chunk
    torch.cuda.empty_cache()

print("\n🎉 All chunks successfully saved as separate CSV files!")


Using device: cuda

🚀 Processing chunk 1/100...


Processing Batches:  17%|█▋        | 1/6 [00:40<03:21, 40.29s/batch]


RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasLtMatmul with transpose_mat1 0 transpose_mat2 0 m 768 n 262144 k 768 mat1_ld 768 mat2_ld 768 result_ld 768 abcType 0 computeType 68 scaleType 0

In [5]:
import glob

# Load all chunk files and concatenate
files = glob.glob("NEST_chunk_clinical*.csv")
df_list = [pd.read_csv(f) for f in files]
final_df = pd.concat(df_list, axis=0)
final_df.head(10)

# Save final combined dataset
final_df.to_csv("NEST_clinical_embeddings_full.csv", index=False)
print("🎯 Merged all chunks into NEST_final_embeddings.csv")


🎯 Merged all chunks into NEST_final_embeddings.csv


In [4]:
# Load BioBERT model & tokenizer
clinicaltokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
clinicalmodel = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT").to("cuda")  # Move model to GPU

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Function to extract BioBERT embeddings in GPU-friendly batches
def clinical_extract_text_embeddings(text_list, batch_size=512):
    """Extracts embeddings for text data using BioBERT with batch processing on GPU."""
    embeddings = []
    
    dataloader = DataLoader(text_list, batch_size=batch_size, shuffle=False)

    for batch_texts in tqdm(dataloader, desc="Processing Batches", unit="batch"):
        try:
            # Tokenization and moving inputs to GPU
            inputs = clinicaltokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
            inputs = {key: val.to(device) for key, val in inputs.items()}  # Ensure all tensors are on GPU
            
            with torch.no_grad():  # No gradient tracking for inference
                outputs = clinicalmodel(**inputs)

            # Compute mean pooling of token embeddings (Sentence Representation)
            batch_embeddings = outputs.last_hidden_state.mean(dim=1)  # Stays on GPU

            embeddings.append(batch_embeddings)

            # Free GPU memory after every batch
            del inputs, outputs
            torch.cuda.empty_cache()

        except torch.cuda.OutOfMemoryError:
            print("\n⚠️ CUDA OOM Error: Reducing batch size and retrying...\n")
            torch.cuda.empty_cache()
            return clinical_extract_text_embeddings(text_list, batch_size=max(batch_size // 2, 1))  # Reduce batch size & retry

    return torch.cat(embeddings, dim=0).cpu().numpy()  # Move embeddings to CPU & convert to NumPy

# Split dataset into 100 chunks
num_chunks = 100
chunk_size = len(data) // num_chunks

for i in range(num_chunks):
    print(f"\n🚀 Processing chunk {i+1}/{num_chunks}...")

    # Select subset of data for this chunk
    start_idx = i * chunk_size
    end_idx = (i + 1) * chunk_size if i != num_chunks - 1 else len(data)  # Last chunk gets remaining data
    chunk = data.iloc[start_idx:end_idx].copy()  # Use .copy() to avoid warnings

    # Convert text column to list
    clinical_text_data = chunk["Unstructured"].astype(str).tolist()

    # Extract embeddings using batch processing on GPU
    embeddings_cpu = clinical_extract_text_embeddings(clinical_text_data, batch_size=512)  # Returns NumPy array

    # Create DataFrame with embedding columns
    clinicalembedding_df = pd.DataFrame(embeddings_cpu, index=chunk.index)
    clinicalembedding_df.columns = [f"Unstructured_embed_{j}" for j in range(embeddings_cpu.shape[1])]

    # Merge embeddings with original text in the chunk
    chunk = pd.concat([chunk, clinicalembedding_df], axis=1)

    # Save the chunk with both text and embeddings
    output_filename = f"NEST_chunk_clinical{i+1}.csv"
    chunk.to_csv(output_filename, index=True)
    
    print(f"✅ Saved chunk {i+1} to {output_filename}")

    # Free GPU memory after each chunk
    del embeddings_cpu, clinicalembedding_df, chunk
    torch.cuda.empty_cache()

print("\n🎉 All chunks successfully saved as separate CSV files!")


Using device: cuda

🚀 Processing chunk 1/100...


Processing Batches: 100%|██████████| 6/6 [02:22<00:00, 23.83s/batch]


✅ Saved chunk 1 to NEST_chunk_clinical1.csv

🚀 Processing chunk 2/100...


Processing Batches: 100%|██████████| 6/6 [02:23<00:00, 23.86s/batch]


✅ Saved chunk 2 to NEST_chunk_clinical2.csv

🚀 Processing chunk 3/100...


Processing Batches: 100%|██████████| 6/6 [02:24<00:00, 24.16s/batch]


✅ Saved chunk 3 to NEST_chunk_clinical3.csv

🚀 Processing chunk 4/100...


Processing Batches: 100%|██████████| 6/6 [02:48<00:00, 28.01s/batch]


✅ Saved chunk 4 to NEST_chunk_clinical4.csv

🚀 Processing chunk 5/100...


Processing Batches: 100%|██████████| 6/6 [03:01<00:00, 30.27s/batch]


✅ Saved chunk 5 to NEST_chunk_clinical5.csv

🚀 Processing chunk 6/100...


Processing Batches: 100%|██████████| 6/6 [02:51<00:00, 28.57s/batch]


✅ Saved chunk 6 to NEST_chunk_clinical6.csv

🚀 Processing chunk 7/100...


Processing Batches: 100%|██████████| 6/6 [02:57<00:00, 29.59s/batch]


✅ Saved chunk 7 to NEST_chunk_clinical7.csv

🚀 Processing chunk 8/100...


Processing Batches: 100%|██████████| 6/6 [03:12<00:00, 32.07s/batch]


✅ Saved chunk 8 to NEST_chunk_clinical8.csv

🚀 Processing chunk 9/100...


Processing Batches: 100%|██████████| 6/6 [02:57<00:00, 29.54s/batch]


✅ Saved chunk 9 to NEST_chunk_clinical9.csv

🚀 Processing chunk 10/100...


Processing Batches: 100%|██████████| 6/6 [02:22<00:00, 23.68s/batch]


✅ Saved chunk 10 to NEST_chunk_clinical10.csv

🚀 Processing chunk 11/100...


Processing Batches: 100%|██████████| 6/6 [02:22<00:00, 23.78s/batch]


✅ Saved chunk 11 to NEST_chunk_clinical11.csv

🚀 Processing chunk 12/100...


Processing Batches: 100%|██████████| 6/6 [02:35<00:00, 26.00s/batch]


✅ Saved chunk 12 to NEST_chunk_clinical12.csv

🚀 Processing chunk 13/100...


Processing Batches: 100%|██████████| 6/6 [02:26<00:00, 24.35s/batch]


✅ Saved chunk 13 to NEST_chunk_clinical13.csv

🚀 Processing chunk 14/100...


Processing Batches: 100%|██████████| 6/6 [02:21<00:00, 23.63s/batch]


✅ Saved chunk 14 to NEST_chunk_clinical14.csv

🚀 Processing chunk 15/100...


Processing Batches: 100%|██████████| 6/6 [02:26<00:00, 24.49s/batch]


✅ Saved chunk 15 to NEST_chunk_clinical15.csv

🚀 Processing chunk 16/100...


Processing Batches: 100%|██████████| 6/6 [02:30<00:00, 25.08s/batch]


✅ Saved chunk 16 to NEST_chunk_clinical16.csv

🚀 Processing chunk 17/100...


Processing Batches: 100%|██████████| 6/6 [02:32<00:00, 25.45s/batch]


✅ Saved chunk 17 to NEST_chunk_clinical17.csv

🚀 Processing chunk 18/100...


Processing Batches: 100%|██████████| 6/6 [02:30<00:00, 25.02s/batch]


✅ Saved chunk 18 to NEST_chunk_clinical18.csv

🚀 Processing chunk 19/100...


Processing Batches: 100%|██████████| 6/6 [02:30<00:00, 25.04s/batch]


✅ Saved chunk 19 to NEST_chunk_clinical19.csv

🚀 Processing chunk 20/100...


Processing Batches: 100%|██████████| 6/6 [02:29<00:00, 24.96s/batch]


✅ Saved chunk 20 to NEST_chunk_clinical20.csv

🚀 Processing chunk 21/100...


Processing Batches: 100%|██████████| 6/6 [02:29<00:00, 24.96s/batch]


✅ Saved chunk 21 to NEST_chunk_clinical21.csv

🚀 Processing chunk 22/100...


Processing Batches: 100%|██████████| 6/6 [02:29<00:00, 24.90s/batch]


✅ Saved chunk 22 to NEST_chunk_clinical22.csv

🚀 Processing chunk 23/100...


Processing Batches: 100%|██████████| 6/6 [02:29<00:00, 24.97s/batch]


✅ Saved chunk 23 to NEST_chunk_clinical23.csv

🚀 Processing chunk 24/100...


Processing Batches: 100%|██████████| 6/6 [02:29<00:00, 24.86s/batch]


✅ Saved chunk 24 to NEST_chunk_clinical24.csv

🚀 Processing chunk 25/100...


Processing Batches: 100%|██████████| 6/6 [02:30<00:00, 25.00s/batch]


✅ Saved chunk 25 to NEST_chunk_clinical25.csv

🚀 Processing chunk 26/100...


Processing Batches: 100%|██████████| 6/6 [02:30<00:00, 25.05s/batch]


✅ Saved chunk 26 to NEST_chunk_clinical26.csv

🚀 Processing chunk 27/100...


Processing Batches: 100%|██████████| 6/6 [02:23<00:00, 23.94s/batch]


✅ Saved chunk 27 to NEST_chunk_clinical27.csv

🚀 Processing chunk 28/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.67s/batch]


✅ Saved chunk 28 to NEST_chunk_clinical28.csv

🚀 Processing chunk 29/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.68s/batch]


✅ Saved chunk 29 to NEST_chunk_clinical29.csv

🚀 Processing chunk 30/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.65s/batch]


✅ Saved chunk 30 to NEST_chunk_clinical30.csv

🚀 Processing chunk 31/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.71s/batch]


✅ Saved chunk 31 to NEST_chunk_clinical31.csv

🚀 Processing chunk 32/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.66s/batch]


✅ Saved chunk 32 to NEST_chunk_clinical32.csv

🚀 Processing chunk 33/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 33 to NEST_chunk_clinical33.csv

🚀 Processing chunk 34/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.68s/batch]


✅ Saved chunk 34 to NEST_chunk_clinical34.csv

🚀 Processing chunk 35/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 35 to NEST_chunk_clinical35.csv

🚀 Processing chunk 36/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 36 to NEST_chunk_clinical36.csv

🚀 Processing chunk 37/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.67s/batch]


✅ Saved chunk 37 to NEST_chunk_clinical37.csv

🚀 Processing chunk 38/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.66s/batch]


✅ Saved chunk 38 to NEST_chunk_clinical38.csv

🚀 Processing chunk 39/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 39 to NEST_chunk_clinical39.csv

🚀 Processing chunk 40/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 40 to NEST_chunk_clinical40.csv

🚀 Processing chunk 41/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 41 to NEST_chunk_clinical41.csv

🚀 Processing chunk 42/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 42 to NEST_chunk_clinical42.csv

🚀 Processing chunk 43/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.64s/batch]


✅ Saved chunk 43 to NEST_chunk_clinical43.csv

🚀 Processing chunk 44/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 44 to NEST_chunk_clinical44.csv

🚀 Processing chunk 45/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 45 to NEST_chunk_clinical45.csv

🚀 Processing chunk 46/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.64s/batch]


✅ Saved chunk 46 to NEST_chunk_clinical46.csv

🚀 Processing chunk 47/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.58s/batch]


✅ Saved chunk 47 to NEST_chunk_clinical47.csv

🚀 Processing chunk 48/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.64s/batch]


✅ Saved chunk 48 to NEST_chunk_clinical48.csv

🚀 Processing chunk 49/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.73s/batch]


✅ Saved chunk 49 to NEST_chunk_clinical49.csv

🚀 Processing chunk 50/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 50 to NEST_chunk_clinical50.csv

🚀 Processing chunk 51/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.79s/batch]


✅ Saved chunk 51 to NEST_chunk_clinical51.csv

🚀 Processing chunk 52/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 52 to NEST_chunk_clinical52.csv

🚀 Processing chunk 53/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 53 to NEST_chunk_clinical53.csv

🚀 Processing chunk 54/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.67s/batch]


✅ Saved chunk 54 to NEST_chunk_clinical54.csv

🚀 Processing chunk 55/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.65s/batch]


✅ Saved chunk 55 to NEST_chunk_clinical55.csv

🚀 Processing chunk 56/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 56 to NEST_chunk_clinical56.csv

🚀 Processing chunk 57/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.64s/batch]


✅ Saved chunk 57 to NEST_chunk_clinical57.csv

🚀 Processing chunk 58/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 58 to NEST_chunk_clinical58.csv

🚀 Processing chunk 59/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 59 to NEST_chunk_clinical59.csv

🚀 Processing chunk 60/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.55s/batch]


✅ Saved chunk 60 to NEST_chunk_clinical60.csv

🚀 Processing chunk 61/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 61 to NEST_chunk_clinical61.csv

🚀 Processing chunk 62/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.60s/batch]


✅ Saved chunk 62 to NEST_chunk_clinical62.csv

🚀 Processing chunk 63/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.69s/batch]


✅ Saved chunk 63 to NEST_chunk_clinical63.csv

🚀 Processing chunk 64/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 64 to NEST_chunk_clinical64.csv

🚀 Processing chunk 65/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 65 to NEST_chunk_clinical65.csv

🚀 Processing chunk 66/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.57s/batch]


✅ Saved chunk 66 to NEST_chunk_clinical66.csv

🚀 Processing chunk 67/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.64s/batch]


✅ Saved chunk 67 to NEST_chunk_clinical67.csv

🚀 Processing chunk 68/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 68 to NEST_chunk_clinical68.csv

🚀 Processing chunk 69/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.59s/batch]


✅ Saved chunk 69 to NEST_chunk_clinical69.csv

🚀 Processing chunk 70/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.59s/batch]


✅ Saved chunk 70 to NEST_chunk_clinical70.csv

🚀 Processing chunk 71/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.61s/batch]


✅ Saved chunk 71 to NEST_chunk_clinical71.csv

🚀 Processing chunk 72/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.72s/batch]


✅ Saved chunk 72 to NEST_chunk_clinical72.csv

🚀 Processing chunk 73/100...


Processing Batches: 100%|██████████| 6/6 [02:26<00:00, 24.40s/batch]


✅ Saved chunk 73 to NEST_chunk_clinical73.csv

🚀 Processing chunk 74/100...


Processing Batches: 100%|██████████| 6/6 [02:51<00:00, 28.66s/batch]


✅ Saved chunk 74 to NEST_chunk_clinical74.csv

🚀 Processing chunk 75/100...


Processing Batches: 100%|██████████| 6/6 [03:07<00:00, 31.32s/batch]


✅ Saved chunk 75 to NEST_chunk_clinical75.csv

🚀 Processing chunk 76/100...


Processing Batches: 100%|██████████| 6/6 [03:08<00:00, 31.50s/batch]


✅ Saved chunk 76 to NEST_chunk_clinical76.csv

🚀 Processing chunk 77/100...


Processing Batches: 100%|██████████| 6/6 [03:08<00:00, 31.36s/batch]


✅ Saved chunk 77 to NEST_chunk_clinical77.csv

🚀 Processing chunk 78/100...


Processing Batches: 100%|██████████| 6/6 [03:06<00:00, 31.08s/batch]


✅ Saved chunk 78 to NEST_chunk_clinical78.csv

🚀 Processing chunk 79/100...


Processing Batches: 100%|██████████| 6/6 [03:07<00:00, 31.31s/batch]


✅ Saved chunk 79 to NEST_chunk_clinical79.csv

🚀 Processing chunk 80/100...


Processing Batches: 100%|██████████| 6/6 [03:07<00:00, 31.26s/batch]


✅ Saved chunk 80 to NEST_chunk_clinical80.csv

🚀 Processing chunk 81/100...


Processing Batches: 100%|██████████| 6/6 [03:08<00:00, 31.39s/batch]


✅ Saved chunk 81 to NEST_chunk_clinical81.csv

🚀 Processing chunk 82/100...


Processing Batches: 100%|██████████| 6/6 [03:08<00:00, 31.49s/batch]


✅ Saved chunk 82 to NEST_chunk_clinical82.csv

🚀 Processing chunk 83/100...


Processing Batches: 100%|██████████| 6/6 [03:09<00:00, 31.57s/batch]


✅ Saved chunk 83 to NEST_chunk_clinical83.csv

🚀 Processing chunk 84/100...


Processing Batches: 100%|██████████| 6/6 [03:09<00:00, 31.61s/batch]


✅ Saved chunk 84 to NEST_chunk_clinical84.csv

🚀 Processing chunk 85/100...


Processing Batches: 100%|██████████| 6/6 [03:08<00:00, 31.41s/batch]


✅ Saved chunk 85 to NEST_chunk_clinical85.csv

🚀 Processing chunk 86/100...


Processing Batches: 100%|██████████| 6/6 [02:40<00:00, 26.68s/batch]


✅ Saved chunk 86 to NEST_chunk_clinical86.csv

🚀 Processing chunk 87/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 87 to NEST_chunk_clinical87.csv

🚀 Processing chunk 88/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 88 to NEST_chunk_clinical88.csv

🚀 Processing chunk 89/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.66s/batch]


✅ Saved chunk 89 to NEST_chunk_clinical89.csv

🚀 Processing chunk 90/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.64s/batch]


✅ Saved chunk 90 to NEST_chunk_clinical90.csv

🚀 Processing chunk 91/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.58s/batch]


✅ Saved chunk 91 to NEST_chunk_clinical91.csv

🚀 Processing chunk 92/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 92 to NEST_chunk_clinical92.csv

🚀 Processing chunk 93/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 93 to NEST_chunk_clinical93.csv

🚀 Processing chunk 94/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 94 to NEST_chunk_clinical94.csv

🚀 Processing chunk 95/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.65s/batch]


✅ Saved chunk 95 to NEST_chunk_clinical95.csv

🚀 Processing chunk 96/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.62s/batch]


✅ Saved chunk 96 to NEST_chunk_clinical96.csv

🚀 Processing chunk 97/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.59s/batch]


✅ Saved chunk 97 to NEST_chunk_clinical97.csv

🚀 Processing chunk 98/100...


Processing Batches: 100%|██████████| 6/6 [02:15<00:00, 22.63s/batch]


✅ Saved chunk 98 to NEST_chunk_clinical98.csv

🚀 Processing chunk 99/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.69s/batch]


✅ Saved chunk 99 to NEST_chunk_clinical99.csv

🚀 Processing chunk 100/100...


Processing Batches: 100%|██████████| 6/6 [02:16<00:00, 22.82s/batch]


✅ Saved chunk 100 to NEST_chunk_clinical100.csv

🎉 All chunks successfully saved as separate CSV files!
