
# Customer Text Summarization — Concurrency + Batching

- **Python concurrency**: `ThreadPoolExecutor` processes multiple batches in parallel
- **Batch processing**: `llm.batch(prompts)` per chunk
- **Single, simple prompt** (no complex fallbacks)


In [1]:
!pip install OpenAI
!pip install langchain
!pip install langchain_community
!pip install Cohere
!pip install langchain-openai langchain-cohere python-dotenv

Collecting langchain_community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting requests<3,>=2.32.5 (from langchain_community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.6.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.6.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.6.7->langchain_community)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.3.29-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import warnings
warnings.filterwarnings('ignore')
from google.colab import userdata

In [3]:
openai_key = userdata.get("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = openai_key

In [4]:
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
from langchain_openai import ChatOpenAI

In [5]:

CSV_PATH     = "https://raw.githubusercontent.com/giridhar276/genai/refs/heads/main/datasets/Bank_Customer_conversations.csv"   # <- set your CSV path
TEXT_COL     = "customer_text"
MODEL        = "gpt-4o-mini"
TEMPERATURE  = 0
TIMEOUT      = 60

BATCH_SIZE   = 40      # rows per batch call
MAX_WORKERS  = 4       # parallel batches

OUTPUT_PATH  = CSV_PATH.replace(".csv", "_with_summary_concurrent.csv")




In [6]:

# Single summarization prompt (simple)
PROMPT_TMPL = (
    "Summarize the customer's message in ONE clear sentence focusing on intent/issue and requested action. "
    "Do not add details that are not present. Return only the summary text on a single line.\n\n"
    "CUSTOMER TEXT:\n"
    '\"\"\"{text}\"\"\"\n'
    "Summary:"
)


In [7]:

# Load
df = pd.read_csv(CSV_PATH)
if TEXT_COL not in df.columns:
    raise ValueError(f"Column '{TEXT_COL}' not found in CSV.")
texts = df[TEXT_COL].astype(str).tolist()
len(texts), df.head(3)


(3,
              call_id                 topic        tone  \
 0  CALL20250915-0001    card_block_unblock       harsh   
 1  CALL20250915-0002  loan_approval_status       harsh   
 2  CALL20250915-0003  loan_approval_status  frustrated   
 
                                        customer_text  
 0  Ms. Patel: My card ending 0530 got blocked aft...  
 1  Ms. Patel: I filed a loan application 27-Aug-2...  
 2  Mr. Singh: I filed a loan application 30-Aug-2...  )

In [8]:

# Model
llm = ChatOpenAI(model=MODEL, temperature=TEMPERATURE, timeout=TIMEOUT)

def chunk_list(seq, size):
    for i in range(0, len(seq), size):
        yield i, seq[i:i+size]




In [9]:
def summarize_batch(texts):
    prompts = [PROMPT_TMPL.format(text=t) for t in texts]
    responses = llm.batch(prompts)  # keeps order
    return [r.content.strip() for r in responses]

In [10]:

# Run batches concurrently
chunks = list(chunk_list(texts, BATCH_SIZE))
summaries_out = [None] * len(texts)

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    fut_map = {ex.submit(summarize_batch, batch): (start, batch) for (start, batch) in chunks}
    for fut in as_completed(fut_map):
        start, batch = fut_map[fut]
        result = fut.result()
        summaries_out[start:start+len(batch)] = result
        print(f"Completed rows {start}–{start+len(batch)-1}")

len(summaries_out)


Completed rows 0–2


3

In [11]:

# Save
df["summary"] = summaries_out
df.to_csv("batchprocessing.csv", index=False)
print(f"Saved: {OUTPUT_PATH}")
df[[TEXT_COL, "summary"]].head(10)


Saved: https://raw.githubusercontent.com/giridhar276/genai/refs/heads/main/datasets/Bank_Customer_conversations_with_summary_concurrent.csv


Unnamed: 0,customer_text,summary
0,Ms. Patel: My card ending 0530 got blocked aft...,The customer requests assistance to unblock th...
1,Ms. Patel: I filed a loan application 27-Aug-2...,The customer is requesting the current approva...
2,Mr. Singh: I filed a loan application 30-Aug-2...,The customer is inquiring about the approval s...
