<a href="https://colab.research.google.com/github/Akshayaa1010/GenAI-Customer-Support-Quality-Auditor/blob/main/transcribe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install git+https://github.com/openai/whisper.git
!sudo apt update && sudo apt install ffmpeg

In [None]:
!whisper "1755884171.51632.mp3" --model medium.en

In [None]:
import re

def clean_subtitle(text):
    # remove timestamps
    text = re.sub(r"\d+:\d+:\d+.\d+ --> .*", "", text)
    text = re.sub(r"\d+\n", "", text)

    # remove html tags if any
    text = re.sub(r"<.*?>","", text)

    # remove filler words
    text = re.sub(r"\b(uh|um|ah|er|hmm|mmm|you know|like|really|probably)\b","", text, flags=re.IGNORECASE)

    # remove extra spaces
    text = re.sub(r"\s+"," ", text)

    return text.strip()


In [None]:
with open("1755884171.51632.vtt","r",encoding="utf-8") as f:
    raw = f.read()

clean_transcript = clean_subtitle(raw)
print(clean_transcript)


In [None]:
import re

def split_turns(text):
    turns = re.split(r'(Yeah|Okay|So|Listen|Well|Thanks|Bye)', text)
    return [t.strip() for t in turns if len(t.strip()) > 3]

turns = split_turns(clean_transcript)
len(turns), turns[:10]

In [None]:
conversation = []

speaker = "Agent"  # start assuming Agent
for i, t in enumerate(turns):
    conversation.append({"speaker": speaker, "text": t})

    # toggle speaker
    speaker = "Customer" if speaker=="Agent" else "Agent"

conversation[:6]


In [None]:
import json

with open("structured_conversation.json","w") as f:
    json.dump(conversation,f,indent=4)


In [None]:
import json
import re
import pandas as pd

# Load file
with open("structured_conversation.json", "r", encoding="utf-8") as f:
    data = json.load(f)

cleaned_data = []

for entry in data:
    text = entry["text"]

    text = re.sub(r"WEBVTT", "", text)
    text = re.sub(r"\d{2}:\d{2}.\d+\s-->\s\d{2}:\d{2}.\d+", "", text)
    text = re.sub(r"\d{2}:\d{2}.\d+", "", text)
    text = re.sub(r",\s*,", ",", text)
    text = re.sub(r"\s+", " ", text).strip()

    if text:
        cleaned_data.append({
            "speaker": entry["speaker"],
            "text": text
        })

df = pd.DataFrame(cleaned_data)

df.to_csv("cleaned_calls.csv", index=False)

print("Cleaning Done! Saved as cleaned_calls.csv")


In [None]:
import re

with open("1755884171.51632.vtt","r",encoding="utf-8") as f:
    text = f.read()

text = re.sub(r"WEBVTT","", text)
text = re.sub(r"\d{2}:\d{2}.\d+\s-->\s\d{2}:\d{2}.\d+","", text)
text = re.sub(r"\d{2}:\d{2}.\d+","", text)
text = re.sub(r"\s+"," ", text).strip()

with open("cleaned_call.txt","w",encoding="utf-8") as f:
    f.write(text)

print("Clean file saved as cleaned_call.txt")

In [None]:
import pandas as pd
import re

# Load your CSV
df = pd.read_csv("cleaned_calls.csv")

print("Before Cleaning:", df.shape)

def clean_transcript(text):
    if pd.isna(text):
        return ""

    # Remove --> 00:13. / --> 09:10. etc
    text = re.sub(r'-->\s*\d{2}:\d{2}\.?\d*', '', text)

    # Remove standalone timestamps like 00:13 , 01:04 etc
    text = re.sub(r'\d{2}:\d{2}\.?\d*', '', text)

    # Remove leftover arrows
    text = re.sub(r'-->', '', text)

    # Remove leading punctuation like ", " or ". "
    text = re.sub(r'^[,\.]\s*', '', text)

    # Remove repetitive punctuation ,, ...
    text = re.sub(r'[,\.]{2,}', '.', text)

    # Normalize spaces
    text = re.sub(r'\s+', ' ', text).strip()

    return text

df['clean_text'] = df['text'].apply(clean_transcript)

# Drop empty rows
df = df[df['clean_text'] != ""]

print("After Cleaning:", df.shape)

# Save cleaned dataset
df[['speaker','clean_text']].to_csv("cleaned_calls.csv", index=False)

print("Cleaning completed! Saved as cleaned_calls.csv")


In [None]:
!pip install -q transformers accelerate torch


In [None]:
import pandas as pd

df = pd.read_csv("cleaned_calls.csv")

conversation_text = ""
for _, row in df.iterrows():
    conversation_text += f"{row['speaker']}: {row['clean_text']}\n"

print(conversation_text[:1000])  # preview only


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

In [None]:
import pandas as pd

df = pd.read_csv("cleaned_calls.csv")

df.head()


In [None]:
conversation_text = ""
for _, row in df.iterrows():
    conversation_text += f"{row['speaker']}: {row['clean_text']}\n"

print(conversation_text[:500])


In [None]:
def chunk_text(text, chunk_size=1200):
    chunks = []
    start = 0
    while start < len(text):
        chunks.append(text[start:start + chunk_size])
        start += chunk_size
    return chunks

conversation_chunks = chunk_text(conversation_text)
print(len(conversation_chunks))


In [None]:
def score_chunk(conversation_chunk):

    prompt = f"""
<|system|>
You are a customer support quality auditor.

<|user|>
Evaluate the AGENT in the following conversation.

1. Give scores from 1 to 100 for:
   - Empathy
   - Professionalism

2. For Compliance, classify the agent behavior as one of:
   - PASS (fully compliant)
   - WARN (minor issues or risky statements)
   - FAIL (clear policy or ethical violations)

Respond STRICTLY in this format:
Empathy Score: <number>
Professionalism Score: <number>
Compliance Status: <PASS / WARN / FAIL>

Conversation:
{conversation_chunk}

<|assistant|>
"""



    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True
    ).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.2,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

In [None]:
test_output = score_chunk(conversation_chunks[0])
print(test_output)


In [None]:
import pandas as pd
import json
import re

# --- Start of code to regenerate 'cleaned_calls.csv' ---
# This section combines logic from previous cells (M1hSz1FYuJrK, 7D89xjW52nS9, S3JWj1bh4-_o)
# to ensure 'cleaned_calls.csv' is available.

# NOTE: This assumes 'structured_conversation.json' exists. If not, further prior cells would need to be run.
# Load 'structured_conversation.json'
with open("structured_conversation.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Process data as in cell 7D89xjW52nS9
cleaned_data_step1 = []
for entry in data:
    text = entry["text"]

    text = re.sub(r"WEBVTT", "", text)
    text = re.sub(r"\d{2}:\d{2}.\d+\s-->\s\d{2}:\d{2}.\d+", "", text)
    text = re.sub(r"\d{2}:\d{2}.\d+", "", text)
    text = re.sub(r",\s*,", ",", text)
    text = re.sub(r"\s+", " ", text).strip()

    if text:
        cleaned_data_step1.append({
            "speaker": entry["speaker"],
            "text": text
        })

df_intermediate = pd.DataFrame(cleaned_data_step1)

# Define clean_transcript function as in cell S3JWj1bh4-_o
def clean_transcript(text):
    if pd.isna(text):
        return ""

    # Remove --> 00:13. / --> 09:10. etc
    text = re.sub(r'-->\s*\d{2}:\d{2}\.?\d*', '', text)

    # Remove standalone timestamps like 00:13 , 01:04 etc
    text = re.sub(r'\d{2}:\d{2}\.?\d*', '', text)

    # Remove leftover arrows
    text = re.sub(r'-->', '', text)

    # Remove leading punctuation like ", " or ". "
    text = re.sub(r'^[\.,]\s*', '', text)

    # Remove repetitive punctuation ,, ...
    text = re.sub(r'[\.,]{2,}', '.', text)

    # Normalize spaces
    text = re.sub(r'\s+', ' ', text).strip()

    return text

# Apply further cleaning and save as 'cleaned_calls.csv' as in cell S3JWj1bh4-_o
df_final = df_intermediate.copy()
df_final['clean_text'] = df_final['text'].apply(clean_transcript)

# Drop empty rows
df_final = df_final[df_final['clean_text'] != ""]

# Save cleaned dataset, overwriting previous if it existed
df_final[['speaker','clean_text']].to_csv("cleaned_calls.csv", index=False)

# --- End of code to regenerate 'cleaned_calls.csv' ---

# Now, the original part of this cell can run, as 'cleaned_calls.csv' should exist
df = pd.read_csv("cleaned_calls.csv")

conversation_text = ""
for _, row in df.iterrows():
    conversation_text += f"{row['speaker']}: {row['clean_text']}\n"

def chunk_text(text, chunk_size=1200):
    chunks = []
    start = 0
    while start < len(text):
        chunks.append(text[start:start + chunk_size])
        start += chunk_size
    return chunks

conversation_chunks = chunk_text(conversation_text)

all_outputs = []

for i, chunk in enumerate(conversation_chunks):
    print(f"Scoring chunk {i+1}/{len(conversation_chunks)}")
    output = score_chunk(chunk)
    all_outputs.append(output)

In [None]:
import re

def parse_scores(text):
    empathy = re.search(r"Empathy Score:\s*(\d+)", text)
    professionalism = re.search(r"Professionalism Score:\s*(\d+)", text)
    compliance = re.search(r"Compliance Status:\s*(PASS|WARN|FAIL)", text)

    return {
        "empathy": int(empathy.group(1)) if empathy else None,
        "professionalism": int(professionalism.group(1)) if professionalism else None,
        "compliance": compliance.group(1) if compliance else None
    }

In [None]:
scores = [parse_scores(o) for o in all_outputs]

scores

In [None]:
import numpy as np

empathy_scores = [s['empathy'] for s in scores if s['empathy'] is not None]
professionalism_scores = [s['professionalism'] for s in scores if s['professionalism'] is not None]

avg_empathy = round(np.mean(empathy_scores), 2)
avg_professionalism = round(np.mean(professionalism_scores), 2)

In [None]:
compliance_values = [s['compliance'] for s in scores]

if "FAIL" in compliance_values:
    final_compliance = "FAIL"
elif "WARN" in compliance_values:
    final_compliance = "WARN"
else:
    final_compliance = "PASS"

In [None]:
print("FINAL AUDIT RESULT")
print("-------------------")
print(f"Average Empathy Score: {avg_empathy}")
print(f"Average Professionalism Score: {avg_professionalism}")
print(f"Overall Compliance Status: {final_compliance}")

In [None]:
!pip install langchain faiss-cpu sentence-transformers


In [None]:
!pip install -U langchain langchain-community langchain-core faiss-cpu sentence-transformers


In [None]:
!pip install -U langchain langchain-community langchain-core langchain-text-splitters faiss-cpu sentence-transformers


In [None]:
!pip install -U \
  torch torchvision torchaudio \
  transformers \
  sentence-transformers \
  langchain==1.2.9 \
  langchain-community==0.4.1 \
  langchain-core==1.2.9 \
  langchain-text-splitters==1.1.0 \
  langchain-huggingface==1.2.0 \
  faiss-cpu

### Creating `policies.txt`

To resolve the `FileNotFoundError`, I'm creating a `policies.txt` file with some example policy content. In a real scenario, this file would contain your actual company policies relevant to customer service interactions.

In [None]:
policies_content = """
**Company Policy 1: Customer Empathy**
Agents must demonstrate active listening and empathy towards customers. Acknowledge customer feelings and show understanding of their situation. Use phrases like "I understand this is frustrating" or "I apologize for the inconvenience you've experienced." Strive to connect with the customer on an emotional level while maintaining professionalism.

**Company Policy 2: Professionalism in Communication**
All agent interactions must be professional, courteous, and respectful. Avoid slang, jargon, or overly casual language. Maintain a calm and composed demeanor, even when customers are distressed. Ensure clear and concise communication.

**Company Policy 3: Data Privacy and Confidentiality**
Agents must never share sensitive customer information with unauthorized parties. Always verify customer identity before discussing account-specific details. Adhere to all data protection regulations (e.g., GDPR, CCPA). Do not ask for passwords or other highly sensitive credentials.

**Company Policy 4: Accurate Information Provision**
Agents must provide accurate and up-to-date information regarding products, services, and company procedures. If unsure, consult reliable internal resources or escalate the query to a supervisor. Misinformation can lead to compliance violations and customer dissatisfaction.

**Company Policy 5: Resolution and Follow-up**
Agents should aim for first-contact resolution whenever possible. Clearly communicate next steps and timelines if an issue cannot be resolved immediately. Follow up with customers as promised to ensure satisfaction and provide updates.

**Company Policy 6: Ethical Conduct**
Agents must act with integrity and honesty in all interactions. Do not make false promises or misleading statements. Report any suspicious activity or potential breaches of conduct to management.

**Company Policy 7: Prohibited Language**
Agents are strictly prohibited from using offensive, discriminatory, or aggressive language. Avoid political or personal opinions during customer interactions.

**Company Policy 8: Compliance with Legal Requirements**
All conversations and actions must comply with relevant legal and regulatory frameworks, including consumer protection laws and industry-specific regulations.
"""

with open("policies.txt", "w") as f:
    f.write(policies_content)

print("policies.txt created successfully.")

In [None]:
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Load policy text
with open("policies.txt", "r") as f:
    policy_text = f.read()

# Split policy text
text_splitter = CharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50
)

docs = text_splitter.create_documents([policy_text])

# Create embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

# Create FAISS vector store
vectorstore = FAISS.from_documents(docs, embeddings)

print("âœ… RAG policy vector store ready")

In [None]:
def retrieve_policy(conversation_chunk):
    retrieved_docs = vectorstore.similarity_search(conversation_chunk, k=2)
    return "\n".join([doc.page_content for doc in retrieved_docs])


In [None]:
import json # Ensure json is imported
import re # Import re for regex parsing

def contextual_audit(conversation_chunk):
    policy_context = retrieve_policy(conversation_chunk)

    # Simplified and more direct prompt, removing system/user/assistant tags
    prompt = f"""
As a customer support quality auditor, your task is to analyze the agent's performance in the conversation below, considering the provided company policies.

Company Policies:
{policy_context}

Conversation:
{conversation_chunk}

Evaluate the AGENT and provide scores for Empathy (1-100) and Professionalism (1-100), along with a Compliance status (PASS, WARN, or FAIL).

Your response MUST be a JSON object ONLY, with no other text, explanations, or formatting. Generate only the JSON. Do not include any other text.

{{
  "Empathy": <score 1-100>,
  "Professionalism": <score 1-100>,
  "Compliance": "<PASS/WARN/FAIL>"
}}
"""

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True
    ).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.2,
        do_sample=False,
        eos_token_id=tokenizer.eos_token_id
    )

    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the part generated by the model, excluding the input prompt
    prompt_text_len = len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True))
    generated_text = full_response[prompt_text_len:].strip()

    # Attempt to parse the generated_text directly as JSON first
    try:
        parsed_json = json.loads(generated_text)
        return json.dumps(parsed_json, indent=2) # Return pretty-printed JSON
    except json.JSONDecodeError:
        # If direct parsing fails, try to find a JSON block using regex (e.g., between curly braces)
        json_match = re.search(r'\{.*\}', generated_text, re.DOTALL)
        if json_match:
            json_string = json_match.group(0)
            try:
                parsed_json = json.loads(json_string)
                return json.dumps(parsed_json, indent=2)
            except json.JSONDecodeError:
                return f"JSON parsing failed after attempting regex. Full generated text: {generated_text}"
        else:
            # If no JSON block is found, return the full generated text for inspection
            return f"No JSON block found. Full generated text: {generated_text}"


In [None]:
import pandas as pd

df = pd.read_csv("cleaned_calls.csv")

conversation_text = ""
for _, row in df.iterrows():
    conversation_text += f"{row['speaker']}: {row['clean_text']}\n"

def chunk_text(text, chunk_size=1200):
    chunks = []
    start = 0
    while start < len(text):
        chunks.append(text[start:start + chunk_size])
        start += chunk_size
    return chunks

conversation_chunks = chunk_text(conversation_text)
print("conversation_chunks regenerated.")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

In [None]:
test_contextual_audit_output = contextual_audit(conversation_chunks[0])
print(test_contextual_audit_output)