<a href="https://colab.research.google.com/github/Anirudho747/Edrk_Google-Collab/blob/main/Assignment_CrewAI_MultiAgents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
"""
Crew feedback processing pipeline (single-file).


- Runtime inputs: OpenAI API key (via getpass) and CSV path (input())
- Agents / Tasks included:
1) Feedback Analyst (analysis)
2) Feedback Categorizer (bug/feature/praise/complaint/spam)
3) Technical Extractor (steps, platform, severity, reproducibility, logs, suggested_fix)
4) Feature Identifier (feature_summary, demand_score, priority, aggregated_features)
5) Ticket Generator (structured tickets + tickets_csv string)
6) Ticket Reviewer (reviews generated tickets for completeness & accuracy)


- All agents are configured to return JSON only. Nothing is written to disk.
- The script runs Crew.kickoff(), then attempts to extract agent outputs (JSON) from the crew result text.
- If the ticket reviewer output isn't easily found inside the crew output, the script attempts a best-effort local review using the same LLM instance.


Notes:
- Do NOT hardcode API keys. Use getpass().
- This script attempts robust extraction of JSON-like structures from the crew output; depending on your crewai version output shape, you may need to inspect `result` manually.


"""
# Install optional dependency so CrewAI's RAG import doesn't explode
!pip install -q --upgrade \
  "qdrant-client>=1.7.0" \
  "crewai>=0.51.0" \
  "crewai-tools>=0.11.0" \
  "openai>=1.0.0" \
  pandas duckdb


import warnings
warnings.filterwarnings('ignore')

# ============================================
# Installation
# ============================================
import warnings
warnings.filterwarnings('ignore')

print("Installation complete!")

# ============================================
# Import Libraries
# ============================================

from getpass import getpass
import os
from datetime import datetime, timezone
from crewai import Agent, Task, Crew, LLM
from crewai_tools import CSVSearchTool
import json
import re

print("Libraries imported!")

# ============================================
# Setup API Keys
# ============================================

# Get OpenAI API Key
print("Enter your OpenAI API Key")
print("Get one at: https://platform.openai.com/api-keys")
openai_api_key = getpass("OpenAI API Key: ")

# Set environment variables
os.environ["OPENAI_API_KEY"] = openai_api_key

print("API keys configured!")

# ============================================
# Configure OpenAI LLM
# ============================================

# Initialize OpenAI
llm = LLM(
    model="gpt-3.5-turbo",
    api_key=openai_api_key,
    temperature=0.7
)

print("OpenAI LLM initialized!")

# ============================================
# Define Utility Helpers
# ============================================

def find_json_with_key(text, key):
    """Find the first JSON object in `text` that contains `key` at top-level.
    Returns parsed object or None.
    """
    if not isinstance(text, str):
        return None
    # search for {...} blocks
    for match in re.finditer(r'\{[\s\S]*?\}', text, flags=re.DOTALL):
        candidate = match.group(0)
        try:
            parsed = json.loads(candidate)
            if isinstance(parsed, dict) and key in parsed:
                return parsed
        except Exception:
            # try fixing single quotes to double quotes
            try:
                parsed = json.loads(candidate.replace("'", '"'))
                if isinstance(parsed, dict) and key in parsed:
                    return parsed
            except Exception:
                continue
    return None


def find_first_json_array(text):
    """Find the first JSON array in text and return parsed list or None."""
    if not isinstance(text, str):
        return None
    for match in re.finditer(r'\[[\s\S]*?\]', text, flags=re.DOTALL):
        candidate = match.group(0)
        try:
            parsed = json.loads(candidate)
            if isinstance(parsed, list):
                return parsed
        except Exception:
            try:
                parsed = json.loads(candidate.replace("'", '"'))
                if isinstance(parsed, list):
                    return parsed
            except Exception:
                continue
    return None


def safe_print_json(obj, max_items=3):
    try:
        if isinstance(obj, list):
            print(json.dumps(obj[:max_items], indent=2))
        else:
            print(json.dumps(obj, indent=2))
    except Exception:
        print(obj)

# ============================================
# Get CSV File
# ============================================

# Colab upload
from google.colab import files
uploaded = files.upload()

csv_path = next(iter(uploaded.keys()))
print("CSV uploaded! Path:", csv_path)  # e.g., 'telecom_feedback_sample (3).csv'

# Robust import for CSVSearchTool (different versions export it differently)
try:
    from crewai_tools import CSVSearchTool
except ModuleNotFoundError:
    from crewai_tools.tools.csv_search import CSVSearchTool  # fallback

# Handle minor constructor differences across versions
def make_csv_tool(path: str):
    try:
        return CSVSearchTool(csv=path)          # common
    except TypeError:
        try:
            return CSVSearchTool(file_path=path)  # variant
        except TypeError:
            return CSVSearchTool(path=path)       # rare

csv_tool = make_csv_tool(csv_path)
print("CSV tool ready:", type(csv_tool).__name__)

# -------------------------
# Agents
# -------------------------

feedback_analyst = Agent(
    role='Feedback Analyst',
    goal='Analyze user feedback and extract high-level themes and sentiment.',
    backstory='Expert at summarizing user sentiment and surfacing product-level insights.',
    tools=[csv_tool],
    llm=llm,
    verbose=True
)

feedback_categorizer = Agent(
    role='Feedback Categorizer',
    goal='Classify feedback rows into bug/feature request/praise/complaint/spam and return structured JSON.',
    backstory='NLP classification specialist.',
    tools=[csv_tool],
    llm=llm,
    verbose=True
)

tech_extractor = Agent(
    role='Technical Extractor',
    goal='Extract reproducible steps, platform info, severity, reproducibility, logs, and suggested fixes.',
    backstory='Engineer-minded extractor.',
    tools=[csv_tool],
    llm=llm,
    verbose=True
)

feature_identifier = Agent(
    role='Feature Identifier',
    goal='Detect feature requests, summarize them, and estimate demand and impact.',
    backstory='Product-minded agent for feature discovery.',
    tools=[csv_tool],
    llm=llm,
    verbose=True
)

ticket_generator = Agent(
    role='Ticket Generator',
    goal='Generate structured tickets for each feedback row and produce CSV text for downstream logging.',
    backstory='Agent experienced in converting user reports into tracker tickets with sensible defaults for priority/assignee.',
    tools=[csv_tool],
    llm=llm,
    verbose=True
)

ticket_reviewer = Agent(
    role='Ticket Reviewer',
    goal='Review generated tickets for completeness and accuracy and suggest corrections.',
    backstory='QA/product-minded reviewer who validates ticket quality for engineering triage.',
    tools=[csv_tool],
    llm=llm,
    verbose=True
)

# ============================================
# Tasks
# ============================================

# Define a task that utilizes the tool
analyze_feedback_task = Task(
    description=(
        "Read the provided feedback CSV file, identify common themes, and summarize overall sentiment. "
        "Return a short JSON report or text summary."
    ),
    expected_output="High-level summary JSON or short text.",
    agent=feedback_analyst,
)


categorize_feedback_task = Task(
    description=(
        "For every feedback row in the CSV, classify the entry into exactly one of: bug, feature request, praise, complaint, spam. "
        "Return a JSON array (one object per row) with fields: row_index, category, confidence (0.0-1.0), notes. JSON only."
    ),
    expected_output='[{"row_index":0,"category":"bug","confidence":0.95,"notes":""}, ...]',
    agent=feedback_categorizer,          # <-- assign agent
)


extract_technical_task = Task(
    description=(
        "For every feedback row, extract technical details for triage. Return a JSON array one object per row with: "
        "row_index, steps_to_reproduce (list or null), platform_info (object or null), severity (critical/high/medium/low/unknown), "
        "reproducibility (always/sometimes/rarely/unknown), logs_snippet (string or null), suggested_fix (string or null), confidence (0.0-1.0), notes. JSON only."
    ),
    expected_output='[...]',
    agent=tech_extractor
)


feature_identify_task = Task(
    description=(
        "For every feedback row determine if it is a feature request. Return a JSON object with two keys: 'per_row' (array per input row) and 'aggregated_features'. "
        "Per-row objects should include: row_index, is_feature_request (true/false), feature_summary (one-line or null), user_impact (high/medium/low/unknown), demand_score (0-100), priority (P0/P1/P2/P3/unknown), rationale, confidence, notes. JSON only."
    ),
    expected_output='{"per_row":[...],"aggregated_features":[...]}',
    agent=feature_identifier,            # <-- assign agent
)


# Ticket generation task — returns tickets array and tickets_csv string
ticket_generation_task = Task(
    description=(
        "Generate structured tickets for each feedback row. Return a JSON object with keys 'tickets' (array) and 'tickets_csv' (CSV text string). "
        "Each ticket must contain: ticket_id (TICKET-{row_index}-{YYYYMMDD}), title, description, reporter_row_index, category, severity, priority, suggested_assignee, tags (array), created_at (UTC ISO), technical_details (object or null), feature_summary (or null), confidence, notes. JSON only."
    ),
    expected_output='{"tickets":[...],"tickets_csv":"ticket_id,title,...\\n..."}',
    agent=ticket_generator,              # <-- assign agent
)


# Ticket review task — verifies tickets for completeness & accuracy
ticket_review_task = Task(
    description=(
        "Review the tickets generated and assess them for completeness and accuracy. Return a JSON array 'reviews' (one object per ticket) with fields: "
        " - ticket_id, "
        " - issues (array of strings describing missing/incorrect fields), "
        " - suggested_corrections (object with corrected fields or guidance), "
        " - overall_completeness (0.0-1.0), "
        " - overall_confidence (0.0-1.0), "
        " - reviewer_notes (short string). "
        "Rules: check that required fields exist and are consistent (e.g., severity & priority mapping), ensure description includes original feedback and technical summary when applicable, and flag low-confidence tickets (<0.5) for human review. JSON only."
    ),
    expected_output='[{"ticket_id":"TICKET-0-20251021","issues":["missing severity"],"suggested_corrections":{},"overall_completeness":0.6,"overall_confidence":0.45,"reviewer_notes":"..."}, ...]',
    agent=ticket_reviewer,               # <-- assign agent
)



# -------------------------
# Crew orchestration & kickoff
# -------------------------

feedback_crew = Crew(
    agents=[
        feedback_analyst,
        feedback_categorizer,
        tech_extractor,
        feature_identifier,
        ticket_generator,
        ticket_reviewer
    ],
    tasks=[
        analyze_feedback_task,
        categorize_feedback_task,
        extract_technical_task,
        feature_identify_task,
        ticket_generation_task,
        ticket_review_task
    ],
    verbose=True
)

print("Kicking off Crew... this will call all agents/tasks (they are instructed to return JSON only).\n")
result = feedback_crew.kickoff()

print("\n--- CREW RAW OUTPUT (truncated preview) ---\n")
# print a short preview so user knows something ran
if isinstance(result, str):
    print(result[:1500])
else:
    try:
        print(type(result))
        if hasattr(result, 'keys'):
            print(list(result.keys())[:10])
    except Exception:
        print(str(result)[:1000])

# -------------------------
# Attempt to extract outputs
# -------------------------

print("\nAttempting to extract generated artifacts from the Crew output...\n")

# Try to find tickets object (preferred)
tickets_obj = None
if isinstance(result, dict) and 'tickets' in result:
    tickets_obj = result
else:
    tickets_obj = find_json_with_key(result, 'tickets')

if tickets_obj:
    print("Found tickets object. Sample ticket:")
    try:
        print(json.dumps(tickets_obj['tickets'][0], indent=2))
    except Exception:
        safe_print_json(tickets_obj)
else:
    print("Could not auto-locate a 'tickets' object in the Crew output. You may need to inspect `result` manually.")

# Try to find ticket reviews
reviews = None
if isinstance(result, dict) and 'reviews' in result:
    reviews = result['reviews']
else:
    reviews = find_json_with_key(result, 'reviews')

if reviews:
    print("Found ticket reviews. Sample:")
    safe_print_json(reviews)
else:
    print("No ticket reviews found automatically. Attempting a best-effort local review if tickets were found...")

# If tickets were found but no reviews, do a local review by prompting the LLM directly (best-effort)
if tickets_obj and not reviews:
    try:
        tickets = tickets_obj.get('tickets') if isinstance(tickets_obj, dict) else None
        if isinstance(tickets, list) and len(tickets) > 0:
            # build a concise prompt for the LLM
            prompt = {
                "instruction": (
                    "You are a ticket reviewer. Given an array of ticket objects (fields like ticket_id,title,description,category,severity,priority,technical_details,confidence,notes), "
                    "produce a JSON array 'reviews' (one object per ticket) with fields: ticket_id, issues (array), suggested_corrections (object), overall_completeness (0.0-1.0), overall_confidence (0.0-1.0), reviewer_notes. "
                    "Be conservative: if confidence<0.5, mark for human review. Return JSON only."
                ),
                "tickets_sample": tickets[:5]  # include a small sample to keep prompt size small
            }
            # Use the same LLM instance to run a quick review
            # NOTE: the LLM API here is the Crew LLM wrapper. If Agent.run-like API is available you can call the Agent to run this task.
            if hasattr(llm, 'call'):
                llm_prompt = (
                    f"INSTRUCTION:\n{prompt['instruction']}\n\nTICKETS_SAMPLE:\n"
                    f"{json.dumps(prompt['tickets_sample'], indent=2)}"
                )
                llm_response = llm.call(llm_prompt)
            else:
                llm_response = None

            # Try to parse the response
            if isinstance(llm_response, str):
                parsed = find_json_with_key(llm_response, 'reviews') or find_first_json_array(llm_response)
                if parsed:
                    print("Local LLM review parsed. Sample:")
                    safe_print_json(parsed)
                else:
                    print("Local LLM returned text but we couldn't parse a JSON reviews object/array from it. Inspect the raw response below:")
                    print(llm_response[:2000])
            else:
                print("Could not call LLM for a local review: 'llm.call' not available on LLM wrapper.\nYou can inspect Crew result manually to find ticket reviewer output.")
        else:
            print("Tickets object present but empty or not a list; skipping local review.")
    except Exception as e:
        print("Error while attempting local review:", str(e))

print("\nPipeline finished. Inspect the printed outputs above. If you want strict sequential chaining (categorizer -> extractor -> feature identifier -> ticket generator -> reviewer) where outputs are explicitly passed between agents, say 'implement chaining' and I will provide an orchestrated version.")

Installation complete!
Libraries imported!
Enter your OpenAI API Key
Get one at: https://platform.openai.com/api-keys
OpenAI API Key: ··········
API keys configured!
OpenAI LLM initialized!


Saving telecom_feedback_sample.csv to telecom_feedback_sample (6).csv
CSV uploaded! Path: telecom_feedback_sample (6).csv
CSV tool ready: CSVSearchTool
Kicking off Crew... this will call all agents/tasks (they are instructed to return JSON only).



Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()