In [1]:
from mcp.server.fastmcp import FastMCP
import json
import torch
from pathlib import Path

mcp = FastMCP("Research Assistant Intent Classifier")


In [2]:
# Paths
INTENTS_PATH = "/home/siamai/deepsad/local-deep-researcher/data/intents.json"
MODEL_PATH = "/home/siamai/deepsad/local-deep-researcher/data/intent_classifier_nn.pth"

# Load complete intents (23 categories from analysis)
try:
    with open(INTENTS_PATH, 'r', encoding='utf-8') as f:
        INTENTS = json.load(f)
    print(f"✓ Loaded {len(INTENTS)} intent categories")
    
    # Print all categories for research assistant
    categories = list(INTENTS.keys())
    print("Categories:", ", ".join(categories[:10]) + "..." if len(categories) > 10 else ", ".join(categories))
except Exception as e:
    print(f"⚠ Intents load failed: {e}")
    INTENTS = {"search_request": ["ค้นหา", "หาข้อมูล"]}

✓ Loaded 22 intent categories
Categories: search_request, explanation_request, summary_request, source_request, academic_query, data_analysis, comparison_request, methodology_query, definition_request, trend_analysis...


In [4]:
# Load model with GPU support
MODEL = None
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Fix model loading
try:
    if Path(MODEL_PATH).exists():
        # Load state dict only
        state_dict = torch.load(MODEL_PATH, map_location=device)
        
        # You need to define your model architecture first
        # MODEL = YourModelClass()  # Define your model architecture
        # MODEL.load_state_dict(state_dict)
        # MODEL.eval()
        # MODEL = MODEL.to(device)
        
        print("✓ Model weights loaded (architecture needed)")
except Exception as e:
    print(f"⚠ Model load failed: {e}")



Using device: cuda
✓ Model weights loaded (architecture needed)


In [5]:
@mcp.tool()
def classify_research_intent(query: str) -> str:
    """Classify research query intent for academic assistant"""
    query_lower = query.lower()
    
    # Research-specific intent prioritization
    research_priority = [
        "academic_query", "literature_review", "source_request",
        "data_analysis", "methodology_query", "hypothesis_testing",
        "comparison_request", "trend_analysis", "expert_opinion"
    ]
    
    # Check high-priority research intents first
    for intent in research_priority:
        if intent in INTENTS:
            for keyword in INTENTS[intent]:
                if keyword.lower() in query_lower:
                    return f"Intent: {intent}\nType: Research Query\nKeyword: {keyword}\nPriority: High"
    
    # Check remaining intents
    for intent, keywords in INTENTS.items():
        if intent not in research_priority:
            for keyword in keywords:
                if keyword.lower() in query_lower:
                    priority = "Medium" if intent in ["search_request", "explanation_request"] else "Low"
                    return f"Intent: {intent}\nType: General Query\nKeyword: {keyword}\nPriority: {priority}"
    
    return f"Intent: search_request\nType: Default\nPriority: Medium\nQuery: {query}"

@mcp.tool()
def get_research_strategy(intent: str) -> str:
    """Get research strategy based on intent"""
    strategies = {
        "academic_query": "ArXiv + Academic databases",
        "literature_review": "Academic search + Citation analysis",
        "source_request": "Reference databases + Verification",
        "data_analysis": "Web search + Statistical sources",
        "methodology_query": "Academic papers + How-to guides",
        "comparison_request": "Parallel search + Analysis",
        "trend_analysis": "Recent publications + News sources",
        "expert_opinion": "Expert interviews + Opinion pieces",
        "search_request": "General web search",
        "explanation_request": "Educational resources + Definitions",
        "clarification_request": "Follow-up questions + Context",
        "feedback_request": "User feedback + Surveys",
        "summary_request": "Content summarization + Key points",
        "definition_request": "Dictionaries + Technical definitions", 
        "case_study": "Real examples + Case databases",
        "hypothesis_testing": "Research validation + Statistical tests",
        "statistical_query": "Statistical databases + Data sources",
        "greeting": "Friendly response + Context setup",
        "goodbye": "Session closure + Follow-up offers",
        "thanks": "Acknowledgment + Continued assistance",
        "help_request": "Support resources + Guidance",
        "understanding_check": "Confirmation + Clarification",
    }
    
    strategy = strategies.get(intent, "General web search")
    return f"Strategy for '{intent}': {strategy}"

# Resource: Research Intent Categories
@mcp.resource("research://intents")
def get_research_intents() -> str:
    """Get research-focused intent categories"""
    research_intents = {
        "Academic Research": ["academic_query", "literature_review", "source_request"],
        "Data Analysis": ["data_analysis", "statistical_query", "methodology_query"],
        "Comparison & Trends": ["comparison_request", "trend_analysis", "expert_opinion"],
        "Knowledge Building": ["definition_request", "explanation_request", "case_study"],
        "Interaction": ["greeting", "thanks", "help_request", "clarification_request"]
    }
    
    output = "Research Assistant Intent Categories:\n\n"
    for category, intents in research_intents.items():
        count = sum(len(INTENTS.get(intent, [])) for intent in intents if intent in INTENTS)
        output += f"• {category}: {len(intents)} intents, {count} keywords\n"
    
    return output

# Comprehensive test function
def test_all_intents():
    """Test all 23 intent categories"""
    print("\n=== Testing All Research Intents ===")
    
    # Test queries for each major category
    test_cases = [
        # Academic Research
        ("ค้นหาข้อมูลเกี่ยวกับ machine learning", "search_request"),
        ("งานวิจัยเรื่อง transformer neural networks", "academic_query"),
        ("ทบทวนวรรณกรรม deep learning", "literature_review"),
        ("ขอแหล่งอ้างอิง AI research", "source_request"),
        
        # Analysis & Methods
        ("วิเคราะห์ข้อมูล performance metrics", "data_analysis"),
        ("วิธีการศึกษา experimental design", "methodology_query"),
        ("ทดสอบสมมติฐาน statistical significance", "hypothesis_testing"),
        ("ขอสถิติ AI adoption rates", "statistical_query"),
        
        # Comparison & Trends
        ("เปรียบเทียบ GPT-4 vs Claude", "comparison_request"),
        ("แนวโน้ม AI development 2024", "trend_analysis"),
        ("ความเห็นผู้เชี่ยวชาญ quantum computing", "expert_opinion"),
        ("กรณีศึกษา successful AI implementation", "case_study"),
        
        # Knowledge & Explanation
        ("อธิบาย neural network architecture", "explanation_request"),
        ("นิยาม reinforcement learning", "definition_request"),
        ("สรุป research findings", "summary_request"),
        
        # Interaction
        ("สวัสดีครับ", "greeting"),
        ("ขอบคุณมาก", "thanks"),
        ("ช่วยด้วย research project", "help_request"),
        ("ไม่เข้าใจ methodology", "clarification_request"),
        ("ลาก่อน", "goodbye")
    ]
    
    print(f"Testing {len(test_cases)} queries across intent categories...")
    
    correct_predictions = 0
    for query, expected_intent in test_cases:
        result = classify_research_intent(query)
        predicted_intent = result.split('\n')[0].replace('Intent: ', '')
        
        is_correct = predicted_intent == expected_intent
        if is_correct:
            correct_predictions += 1
        
        print(f"Query: {query}")
        print(f"Expected: {expected_intent} | Predicted: {predicted_intent} {'✓' if is_correct else '✗'}")
        print(f"Strategy: {get_research_strategy(predicted_intent)}")
        print()
    
    accuracy = correct_predictions / len(test_cases)
    print(f"Accuracy: {correct_predictions}/{len(test_cases)} ({accuracy:.1%})")
    
    # Show research intent summary
    research_summary = get_research_intents()
    print(f"\n{research_summary}")

print("✓ Research Assistant Intent Classifier ready")
test_all_intents()

✓ Research Assistant Intent Classifier ready

=== Testing All Research Intents ===
Testing 20 queries across intent categories...
Query: ค้นหาข้อมูลเกี่ยวกับ machine learning
Expected: search_request | Predicted: search_request ✓
Strategy: Strategy for 'search_request': General web search

Query: งานวิจัยเรื่อง transformer neural networks
Expected: academic_query | Predicted: academic_query ✓
Strategy: Strategy for 'academic_query': ArXiv + Academic databases

Query: ทบทวนวรรณกรรม deep learning
Expected: literature_review | Predicted: literature_review ✓
Strategy: Strategy for 'literature_review': Academic search + Citation analysis

Query: ขอแหล่งอ้างอิง AI research
Expected: source_request | Predicted: source_request ✓
Strategy: Strategy for 'source_request': Reference databases + Verification

Query: วิเคราะห์ข้อมูล performance metrics
Expected: data_analysis | Predicted: data_analysis ✓
Strategy: Strategy for 'data_analysis': Web search + Statistical sources

Query: วิธีการศึกษา ex