I am going to test the system by powering it with two large language models which are ("self.MODEL_ID =") Falcon-7B and ("self.MODEL_ID =") xlm-roberta-large-xnli, then compare and contrast their results to see which one powers the most efficient Project Manager AI system

In [None]:
import pytest
from unittest.mock import MagicMock, patch
import sys

#  CORE AGENT
class ProjectManagementAgent:
    def __init__(self, project_id):
        self.project_id = project_id
        self.actions = {
            'update_timeline': self.adjust_schedule,
            'assign_task': self.delegate_work,
            'flag_risk': self.identify_risks,
            'generate_report': self.create_status_report
        }
        # External service integrations
        self.jira = JiraClient()
        self.ms_project = MSProjectClient()
        self.slack = SlackClient()

    def execute_command(self, user_input: str):
        intent = self._detect_intent(user_input)
        if intent in self.actions:
            # Only pass parameters to methods that need them
            if intent == 'generate_report':
                return self.actions[intent]()
            return self.actions[intent](user_input)
        return "Action not recognized"

    def _detect_intent(self, text: str) -> str:
        """Simplified intent detection"""
        text = text.lower()
        if 'schedule' in text or 'timeline' in text or 'deadline' in text:
            return 'update_timeline'
        elif 'assign' in text or 'delegate' in text or 'task' in text:
            return 'assign_task'
        elif 'risk' in text or 'issue' in text or 'problem' in text:
            return 'flag_risk'
        elif 'report' in text or 'status' in text or 'update' in text:
            return 'generate_report'
        return 'unknown'

    def adjust_schedule(self, params: str):
        return f"Rescheduled tasks based on: {params}"

    def delegate_work(self, request: str):
        return f"Task assigned to team member based on: {request}"

    def identify_risks(self, metrics: str):
        return "Risk analysis completed with high priority items flagged"

    def create_status_report(self):
        return "Generated project status report with key metrics"

# ====================== EXTERNAL SERVICES ======================
class JiraClient:
    def create_task(self, title, assignee):
        return f"JIRA-TASK-{title[:5].upper()}-{assignee}"

class MSProjectClient:
    def update_deadlines(self, new_dates):
        return f"Updated {len(new_dates)} deadlines"

class SlackClient:
    def notify_team(self, message):
        return f"Slack notification sent: {message}"

# ====================== TESTING FRAMEWORK ======================
class TestProjectManagementAgent:
    def setup_method(self):
        """Create a new agent instance before each test"""
        self.agent = ProjectManagementAgent(project_id="TEST-123")

    def test_intent_detection(self):
        assert self.agent._detect_intent("Move deadline to next month") == "update_timeline"
        assert self.agent._detect_intent("Assign bug fix to John") == "assign_task"
        assert self.agent._detect_intent("Potential security risk found") == "flag_risk"
        assert self.agent._detect_intent("Create weekly status report") == "generate_report"

    def test_command_execution(self):
        assert "Rescheduled" in self.agent.execute_command("Delay feature X deadline")
        assert "Task assigned" in self.agent.execute_command("Assign documentation task")
        assert "Generated" in self.agent.execute_command("Create status report")

    def test_generate_report_without_parameters(self):
        """Test that status report generation works without parameters"""
        result = self.agent.execute_command("Generate Q3 status report")
        assert "Generated project status report" in result

# ====================== ENVIRONMENT-SAFE EXECUTION ======================
def run_tests():
    """Run tests in a way that works in all environments"""
    test_classes = [TestProjectManagementAgent]
    passed = failed = 0

    for test_class in test_classes:
        # Create a test instance
        test_instance = test_class()
        # Call setup method to initialize agent
        test_instance.setup_method()

        for method in dir(test_class):
            if method.startswith('test_'):
                test_func = getattr(test_instance, method)
                try:
                    test_func()
                    print(f"✓ PASSED: {method}")
                    passed += 1
                except AssertionError as e:
                    print(f"✗ FAILED: {method} - {str(e)}")
                    failed += 1
                except Exception as e:
                    print(f"⚠ ERROR in {method}: {str(e)}")
                    failed += 1

    print(f"\nTest Results: {passed} passed, {failed} failed")
    return passed, failed

def run_demo():
    """Run agent demonstration"""
    print("\n" + "="*50)
    print("PROJECT MANAGEMENT AGENT DEMO")
    print("="*50)
    agent = ProjectManagementAgent("PROJ-456")

    commands = [
        "Push deadline for login feature to next sprint",
        "Assign documentation task to technical writers",
        "Flag resource shortage risk in design team",
        "Generate Q3 status report for stakeholders"
    ]

    for cmd in commands:
        print(f"\nCommand: {cmd}")
        print(f"Response: {agent.execute_command(cmd)}")

if __name__ == "__main__":
    # Run tests using custom runner
    print("RUNNING TESTS...")
    passed, failed = run_tests()

    # Only run demo if tests pass
    if failed == 0:
        run_demo()
    else:
        print("\nSkipping demo due to test failures")

RUNNING TESTS...
✓ PASSED: test_command_execution
✓ PASSED: test_generate_report_without_parameters
✓ PASSED: test_intent_detection

Test Results: 3 passed, 0 failed

PROJECT MANAGEMENT AGENT DEMO

Command: Push deadline for login feature to next sprint
Response: Rescheduled tasks based on: Push deadline for login feature to next sprint

Command: Assign documentation task to technical writers
Response: Task assigned to team member based on: Assign documentation task to technical writers

Command: Flag resource shortage risk in design team
Response: Risk analysis completed with high priority items flagged

Command: Generate Q3 status report for stakeholders
Response: Generated project status report with key metrics


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from unittest.mock import MagicMock, patch
import sys

# ADVANCED INTENT DETECTION
class FalconIntentDetector:
    def __init__(self):
        self.MODEL_ID = "tiiuae/falcon-7b-instruct"
        self.tokenizer = None
        self.model = None
        self.pipeline = None

    def initialize_model(self):
        """Lazy initialization to only load when needed"""
        if not self.tokenizer:
            print("Loading Falcon-7B model...")
            try:
                self.tokenizer = AutoTokenizer.from_pretrained(self.MODEL_ID, use_fast=False)
                self.model = AutoModelForCausalLM.from_pretrained(
                    self.MODEL_ID,
                    torch_dtype=torch.float16,
                    device_map="auto",
                    trust_remote_code=True
                )
                self.pipeline = pipeline(
                    "text-generation",
                    model=self.model,
                    tokenizer=self.tokenizer,
                    device_map="auto"
                )
                print("Model loaded successfully!")
            except Exception as e:
                print(f"Error loading model: {str(e)}")
                return False
        return True

    def detect_intent(self, text: str) -> str:
        """Use Falcon-7B for advanced intent classification"""
        if not self.initialize_model():
            return "unknown"

        prompt = f"""
        Classify the following project management command into one of these categories:
        [update_timeline, assign_task, flag_risk, generate_report, unknown].
        Return only the category name.

        Command: "{text}"

        Category:
        """

        try:
            sequences = self.pipeline(
                prompt,
                max_new_tokens=10,
                do_sample=False,
                return_full_text=False,
                pad_token_id=self.tokenizer.eos_token_id
            )
            result = sequences[0]['generated_text'].strip().lower()

            # Extract the first matching category
            for category in ['update_timeline', 'assign_task', 'flag_risk', 'generate_report']:
                if category in result:
                    return category

            return 'unknown'
        except Exception as e:
            print(f"Prediction error: {str(e)}")
            return 'unknown'

#  CORE AGENT
class ProjectManagementAgent:
    def __init__(self, project_id, use_advanced_nlp=False):
        self.project_id = project_id
        self.actions = {
            'update_timeline': self.adjust_schedule,
            'assign_task': self.delegate_work,
            'flag_risk': self.identify_risks,
            'generate_report': self.create_status_report
        }
        # External service integrations
        self.jira = JiraClient()
        self.ms_project = MSProjectClient()
        self.slack = SlackClient()

        # NLP configuration
        self.use_advanced_nlp = use_advanced_nlp
        if use_advanced_nlp:
            self.nlp_engine = FalconIntentDetector()

    def execute_command(self, user_input: str):
        intent = self._detect_intent(user_input)
        if intent in self.actions:
            if intent == 'generate_report':
                return self.actions[intent]()
            return self.actions[intent](user_input)
        return "Action not recognized"

    def _detect_intent(self, text: str) -> str:
        """Select detection method based on configuration"""
        if self.use_advanced_nlp:
            return self.nlp_engine.detect_intent(text)
        else:
            return self._basic_intent_detection(text)

    def _basic_intent_detection(self, text: str) -> str:
        """Fallback rule-based detection"""
        text = text.lower()
        if 'schedule' in text or 'timeline' in text or 'deadline' in text:
            return 'update_timeline'
        elif 'assign' in text or 'delegate' in text or 'task' in text:
            return 'assign_task'
        elif 'risk' in text or 'issue' in text or 'problem' in text:
            return 'flag_risk'
        elif 'report' in text or 'status' in text or 'update' in text:
            return 'generate_report'
        return 'unknown'

    def adjust_schedule(self, params: str):
        return f"Rescheduled tasks based on: {params}"

    def delegate_work(self, request: str):
        return f"Task assigned to team member based on: {request}"

    def identify_risks(self, metrics: str):
        return "Risk analysis completed with high priority items flagged"

    def create_status_report(self):
        return "Generated project status report with key metrics"

#  EXTERNAL SERVICES
class JiraClient:
    def create_task(self, title, assignee):
        return f"JIRA-TASK-{title[:5].upper()}-{assignee}"

class MSProjectClient:
    def update_deadlines(self, new_dates):
        return f"Updated {len(new_dates)} deadlines"

class SlackClient:
    def notify_team(self, message):
        return f"Slack notification sent: {message}"

# TESTING FRAMEWORK
class TestProjectManagementAgent:
    def setup_method(self):
        """Create agent instances for testing"""
        self.basic_agent = ProjectManagementAgent(project_id="TEST-123", use_advanced_nlp=False)
        self.advanced_agent = ProjectManagementAgent(project_id="TEST-456", use_advanced_nlp=True)

    def test_intent_detection(self):
        # Test basic detection
        assert self.basic_agent._detect_intent("Move deadline to next month") == "update_timeline"
        assert self.basic_agent._detect_intent("Assign bug fix to John") == "assign_task"

        # Test advanced detection (mock Falcon responses)
        with patch.object(FalconIntentDetector, 'detect_intent',
                          side_effect=lambda x: 'generate_report' if 'report' in x else 'unknown'):
            assert self.advanced_agent._detect_intent("Create financial report") == "generate_report"

    def test_command_execution(self):
        assert "Rescheduled" in self.basic_agent.execute_command("Delay feature X deadline")
        assert "Task assigned" in self.basic_agent.execute_command("Assign documentation task")
        assert "Generated" in self.basic_agent.execute_command("Create status report")

    def test_advanced_nlp_handling(self):
        """Test that the agent properly switches between NLP engines"""
        # Basic agent should use rule-based
        assert self.basic_agent._detect_intent("Risk assessment needed") == "flag_risk"

        # Advanced agent should use Falcon
        with patch.object(FalconIntentDetector, 'detect_intent', return_value='assign_task'):
            assert self.advanced_agent._detect_intent("Complex phrasing for assignment") == "assign_task"

#  ENVIRONMENT-SAFE EXECUTION
def run_tests():
    """Run tests in a way that works in all environments"""
    test_classes = [TestProjectManagementAgent]
    passed = failed = 0

    for test_class in test_classes:
        # Create a test instance
        test_instance = test_class()
        # Call setup method to initialize agent
        test_instance.setup_method()

        for method in dir(test_class):
            if method.startswith('test_'):
                test_func = getattr(test_instance, method)
                try:
                    test_func()
                    print(f"✓ PASSED: {method}")
                    passed += 1
                except AssertionError as e:
                    print(f"✗ FAILED: {method} - {str(e)}")
                    failed += 1
                except Exception as e:
                    print(f"⚠ ERROR in {method}: {str(e)}")
                    failed += 1

    print(f"\nTest Results: {passed} passed, {failed} failed")
    return passed, failed

def run_demo(use_advanced=False):
    """Run agent demonstration with option for advanced NLP"""
    print("\n" + "="*50)
    print(f"PROJECT MANAGEMENT AGENT DEMO ({'Falcon-7B' if use_advanced else 'Basic'} NLP)")
    print("="*50)
    agent = ProjectManagementAgent("PROJ-789", use_advanced_nlp=use_advanced)

    commands = [
        "Postpone the deadline for user authentication feature",
        "Delegate the documentation task to technical writing team",
        "We should flag the resource allocation issue as critical risk",
        "Generate end-of-quarter financial status report"
    ]

    for cmd in commands:
        print(f"\nCommand: {cmd}")
        print(f"Response: {agent.execute_command(cmd)}")

    if use_advanced:
        print("\nNote: Falcon-7B is being used for intent detection")

if __name__ == "__main__":
    # Run tests using custom runner
    print("RUNNING TESTS...")
    passed, failed = run_tests()

    # Run demos after tests
    if passed > 0:
        run_demo(use_advanced=False)

        # Only run advanced demo if specifically requested
        advanced_requested = "--advanced" in sys.argv
        if advanced_requested:
            run_demo(use_advanced=True)
    else:
        print("\nSkipping demo due to test failures")

RUNNING TESTS...
✓ PASSED: test_advanced_nlp_handling
✓ PASSED: test_command_execution
✓ PASSED: test_intent_detection

Test Results: 3 passed, 0 failed

PROJECT MANAGEMENT AGENT DEMO (Basic NLP)

Command: Postpone the deadline for user authentication feature
Response: Rescheduled tasks based on: Postpone the deadline for user authentication feature

Command: Delegate the documentation task to technical writing team
Response: Task assigned to team member based on: Delegate the documentation task to technical writing team

Command: We should flag the resource allocation issue as critical risk
Response: Risk analysis completed with high priority items flagged

Command: Generate end-of-quarter financial status report
Response: Generated project status report with key metrics


In [None]:
from transformers import pipeline
import torch
from unittest.mock import MagicMock
import time

#  ZERO-SHOT INTENT DETECTOR
class ZeroShotIntentDetector:
    def __init__(self):
        self.MODEL_ID = "joeddav/xlm-roberta-large-xnli"
        self.classifier = None
        self.labels = [
            "update_timeline",
            "assign_task",
            "flag_risk",
            "generate_report"
        ]
        self.model_loaded = False

    def initialize_model(self):
        """Initialize zero-shot classifier with error handling"""
        if self.model_loaded:
            return True

        try:
            print("🔄 Loading zero-shot intent classifier...")
            start_time = time.time()

            self.classifier = pipeline(
                "zero-shot-classification",
                model=self.MODEL_ID,
                device=0 if torch.cuda.is_available() else -1
            )

            load_time = time.time() - start_time
            print(f"✅ Intent classifier loaded in {load_time:.1f} seconds")
            self.model_loaded = True
            return True
        except Exception as e:
            print(f"⚠️ Error loading model: {str(e)}")
            print("⚠️ Falling back to rule-based detection")
            return False

    def detect_intent(self, text: str) -> str:
        """Use zero-shot classification with fallback"""
        if not self.initialize_model():
            # Fallback to basic detection if model fails
            return self.fallback_detection(text)

        try:
            results = self.classifier(
                sequences=text,
                candidate_labels=self.labels,
                multi_label=False,
                hypothesis_template="This text is about {}."
            )

            # Return top label if confidence > 50%
            top_label = results['labels'][0]
            top_score = results['scores'][0]

            if top_score > 0.5:
                print(f"🔍 Classified '{text[:30]}...' as {top_label} ({top_score:.2f})")
                return top_label
            return 'unknown'
        except Exception as e:
            print(f"⚠️ Prediction error: {str(e)}")
            return self.fallback_detection(text)

    def fallback_detection(self, text: str) -> str:
        """Rule-based fallback when model fails"""
        text = text.lower()
        if 'schedule' in text or 'timeline' in text or 'deadline' in text or 'postpone' in text:
            return 'update_timeline'
        elif 'assign' in text or 'delegate' in text or 'task' in text:
            return 'assign_task'
        elif 'risk' in text or 'issue' in text or 'problem' in text or 'shortage' in text:
            return 'flag_risk'
        elif 'report' in text or 'status' in text or 'update' in text or 'compile' in text:
            return 'generate_report'
        return 'unknown'

#  CORE AGENT
class ProjectManagementAgent:
    def __init__(self, project_id, use_advanced_nlp=False):
        self.project_id = project_id
        self.actions = {
            'update_timeline': self.adjust_schedule,
            'assign_task': self.delegate_work,
            'flag_risk': self.identify_risks,
            'generate_report': self.create_status_report
        }
        # External service integrations
        self.jira = JiraClient()
        self.ms_project = MSProjectClient()
        self.slack = SlackClient()

        # NLP configuration
        self.use_advanced_nlp = use_advanced_nlp
        if use_advanced_nlp:
            self.nlp_engine = ZeroShotIntentDetector()
        else:
            print("Using basic NLP engine")

    def execute_command(self, user_input: str):
        intent = self._detect_intent(user_input)
        if intent in self.actions:
            if intent == 'generate_report':
                return self.actions[intent]()
            return self.actions[intent](user_input)
        return "Action not recognized"

    def _detect_intent(self, text: str) -> str:
        """Hybrid intent detection with fallback"""
        if self.use_advanced_nlp:
            return self.nlp_engine.detect_intent(text)
        return self.basic_intent_detection(text)

    def basic_intent_detection(self, text: str) -> str:
        """Rule-based detection"""
        text = text.lower()
        if 'schedule' in text or 'timeline' in text or 'deadline' in text or 'postpone' in text:
            return 'update_timeline'
        elif 'assign' in text or 'delegate' in text or 'task' in text:
            return 'assign_task'
        elif 'risk' in text or 'issue' in text or 'problem' in text or 'shortage' in text:
            return 'flag_risk'
        elif 'report' in text or 'status' in text or 'update' in text or 'compile' in text:
            return 'generate_report'
        return 'unknown'

    def adjust_schedule(self, params: str):
        return f"Rescheduled tasks based on: {params}"

    def delegate_work(self, request: str):
        return f"Task assigned based on: {request}"

    def identify_risks(self, metrics: str):
        return "Risk analysis completed with high priority items flagged"

    def create_status_report(self):
        return "Generated project status report with key metrics"

#  EXTERNAL SERVICES
class JiraClient:
    def create_task(self, title, assignee):
        return f"JIRA-TASK-CREATED: {title[:20]}"

class MSProjectClient:
    def update_deadlines(self, new_dates):
        return f"Updated {len(new_dates)} deadlines"

class SlackClient:
    def notify_team(self, message):
        return f"Slack notification sent"

#  PROJECT MANAGEMENT AGENT DEMO
def run_advanced_demo():
    """Run demo with zero-shot intent classification"""
    print("\n" + "="*60)
    print("🚀 PROJECT MANAGEMENT AGENT DEMO (Zero-Shot Intent Classification)")
    print("="*60)
    print("Note: First command will initialize the NLP model")

    agent = ProjectManagementAgent("PROJ-ZERO", use_advanced_nlp=True)

    commands = [
        "Postponement needed for auth feature since dev team is overloaded",
        "Can we assign documentation to technical writers? They have bandwidth",
        "Potential resource shortage in UX team during holiday season",
        "Compile a progress update for stakeholder review next Tuesday",
        "Move the API documentation deadline to Friday"
    ]

    for cmd in commands:
        print(f"\n💬 Command: {cmd}")
        start_time = time.time()
        response = agent.execute_command(cmd)
        response_time = time.time() - start_time
        print(f"🖥️ Response: {response} ({response_time:.2f}s)")

    print("\n" + "-"*60)
    print("Demo Summary:")
    print("- Publicly accessible zero-shot classification model")
    print("- Automatic fallback to rule-based system if needed")
    print("- Robust error handling for production environments")
    print("="*60)

if __name__ == "__main__":
    run_advanced_demo()


🚀 PROJECT MANAGEMENT AGENT DEMO (Zero-Shot Intent Classification)
Note: First command will initialize the NLP model

💬 Command: Postponement needed for auth feature since dev team is overloaded
🔄 Loading zero-shot intent classifier...


Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


✅ Intent classifier loaded in 5.1 seconds
🔍 Classified 'Postponement needed for auth f...' as assign_task (0.53)
🖥️ Response: Task assigned based on: Postponement needed for auth feature since dev team is overloaded (5.15s)

💬 Command: Can we assign documentation to technical writers? They have bandwidth
🔍 Classified 'Can we assign documentation to...' as assign_task (0.76)
🖥️ Response: Task assigned based on: Can we assign documentation to technical writers? They have bandwidth (0.08s)

💬 Command: Potential resource shortage in UX team during holiday season
🖥️ Response: Action not recognized (0.07s)

💬 Command: Compile a progress update for stakeholder review next Tuesday
🖥️ Response: Action not recognized (0.07s)

💬 Command: Move the API documentation deadline to Friday
🔍 Classified 'Move the API documentation dea...' as update_timeline (0.89)
🖥️ Response: Rescheduled tasks based on: Move the API documentation deadline to Friday (0.06s)

----------------------------------------------