# **GNCIPL Week 5**

##**Topic Name  : (18) AI-Powered Speech Analytics for Call Centers**

###**Objective   : Analyze voice data to extract customer sentiment and issue types.**


###**Application : Call center operations**

###**Presented by : Ramkrishna Ghosh**

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


 **Installing API libraries**



In [3]:
!pip install assemblyai==0.17.0

Collecting assemblyai==0.17.0
  Downloading assemblyai-0.17.0-py3-none-any.whl.metadata (22 kB)
Downloading assemblyai-0.17.0-py3-none-any.whl (63 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.3/63.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: assemblyai
Successfully installed assemblyai-0.17.0


In [4]:
!pip install -q google-generativeai

**transcribe.py**

In [8]:
# Transcrbing audio files using 'assemblyai API'

import assemblyai as aai
import os
import json

# Insert your AssemblyAI API Key
aai.settings.api_key = "9c2654ce0f7e48429d4c755ce78c084e"

# List all audio files you want to convert
AUDIO_FILES = [
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1.m4a",
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2.m4a",
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call3.m4a",
]

# Output folder for JSON
OUTPUT_FOLDER = "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/"

def transcribe_audio(audio_path):
    transcriber = aai.Transcriber()

    print(f"\n>> Transcribing: {audio_path}")

    result = transcriber.transcribe(audio_path)

    transcript_text = result.text

    data = {
        "text": transcript_text,
        "id": result.id,
        "status": result.status,
        "audio_duration": result.audio_duration
    }

    # Create output path (___.json for all)
    base_name = os.path.basename(audio_path).split(".")[0]
    output_path = os.path.join(OUTPUT_FOLDER, f"{base_name}.json")

    # Save JSON
    with open(output_path, "w") as f:
        json.dump(data, f, indent=4)

    print(">> Saved:", output_path)


if __name__ == "__main__":
    print("---- Starting transcription for all files... -----")

    for audio in AUDIO_FILES:
        transcribe_audio(audio)

    print("\n ---All audio files transcribed successfully!---")


---- Starting transcription for all files... -----

>> Transcribing: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1.m4a
>> Saved: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1.json

>> Transcribing: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2.m4a
>> Saved: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2.json

>> Transcribing: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call3.m4a
>> Saved: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call3.json

 ---All audio files transcribed successfully!---


**analyze.py**

In [9]:
# analyzing the json files transcribe from raw audio file


from google import genai
import json
import re


# 2. Gemini API Key
API_KEY = "AIzaSyCbkB9BtIzhbBu-lrTvMvyF2wz7-NvSRMs"   # <-- put your API key here
client = genai.Client(api_key=API_KEY)


# 3. MANUAL INPUT PATHS
INPUT_FILES = [
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1.json",
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2.json",
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call3.json"
]

def analyze_single_file(input_path):
    output_path = input_path.replace(".json", "_analysis.json")


    print("** Analyzing:", input_path)
    print("** Output:", output_path)
    print("...................................................................\n")


    # Load transcript JSON
    with open(input_path, "r") as f:
        transcript_data = json.load(f)

    transcript_text = transcript_data.get("text") or transcript_data.get("transcript")
    if not transcript_text:
        raise ValueError(f"Transcript JSON missing 'text' key → {input_path}")


    # Strict JSON prompt

    prompt = f"""
You are an AI that returns ONLY valid JSON. NO extra text. NO explanation. NO markdown.

Analyze this call transcript:

{transcript_text}

Return valid JSON with exactly these keys:
{{
  "customer_name": "",
  "agent_name": "",
  "sentiment": "",
  "issue_type": "",
  "summary": ""
}}
"""


    # Call Gemini API
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt
    )

    raw_output = response.text.strip()


    # Extract JSON safely
    json_match = re.search(r"\{.*\}", raw_output, re.DOTALL)
    if not json_match:
        raise ValueError("Gemini did NOT return JSON:\n" + raw_output)

    json_text = json_match.group(0)
    analysis = json.loads(json_text)


    # Save output analysis file
    with open(output_path, "w") as f:
        json.dump(analysis, f, indent=4)

    print(">> Saved:", output_path)
    print(">>> Extracted:", analysis)



# 4. Run analysis for all 3
print("---- Starting analysis for all 3 transcript files... ---- \n")

for path in INPUT_FILES:
    analyze_single_file(path)

print("\n--- All 3 transcripts analyzed successfully! ---")


---- Starting analysis for all 3 transcript files... ---- 

** Analyzing: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1.json
** Output: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1_analysis.json
...................................................................

>> Saved: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1_analysis.json
>>> Extracted: {'customer_name': 'Cora Vinda', 'agent_name': 'Emma', 'sentiment': 'Neutral', 'issue_type': 'Billing Inquiry - Unknown Charge', 'summary': 'Cora Vinda contacted Customer Support regarding an additional, unidentified charge on her billing statement dated October 15th. Agent Emma is assisting by looking into the details of the charge.'}
** Analyzing: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2.json
** Output: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2_analysis.json
...................................................................

>> Saved: /content

 **generate_csv.py**

In [11]:


import json
import csv


ANALYSIS_FILES = [
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call1_analysis.json",
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call2_analysis.json",
    "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/call3_analysis.json"
]


# 2. OUTPUT CSV PATH
OUTPUT_CSV = "/content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/final_report.csv"


# 3. READ ALL JSON FILES + WRITE CSV
rows = []

for file_path in ANALYSIS_FILES:
    with open(file_path, "r") as f:
        data = json.load(f)

    # Add file name for clarity
    record = {
        "file_name": file_path.split("/")[-1],
        "customer_name": data.get("customer_name", ""),
        "agent_name": data.get("agent_name", ""),
        "sentiment": data.get("sentiment", ""),
        "issue_type": data.get("issue_type", ""),
        "summary": data.get("summary", "")
    }

    rows.append(record)


# 4. Define CSV Columns
fieldnames = [
    "file_name",
    "customer_name",
    "agent_name",
    "sentiment",
    "issue_type",
    "summary"
]


# 5. Write CSV
with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(rows)

print(">> CSV Report Created Successfully!")
print(">> Saved at:", OUTPUT_CSV)


>> CSV Report Created Successfully!
>> Saved at: /content/drive/MyDrive/Colab Notebooks/GNCIPL Week 5/data/final_report.csv


# **-------------------------------------------------------------------------------**