In [None]:
!pip install --upgrade --quiet langgraph langchain-community langchain-openai paramiko fabric pydantic

In [None]:
from kaggle_secrets import UserSecretsClient

ssh_comment = UserSecretsClient().get_secret("SSH_COMMENT")
public_key = UserSecretsClient().get_secret("SSH_PUBLIC_KEY")
encrypted_private_key = UserSecretsClient().get_secret("SSH_PRIVATE_KEY_ENCRYPTED")

ssh_hostname = UserSecretsClient().get_secret("SSH_HOSTNAME")
ssh_username = UserSecretsClient().get_secret("SSH_USERNAME")

ssh_jump_gateway = UserSecretsClient().get_secret("SSH_JUMP_GATEWAY")
ssh_jump_dest = UserSecretsClient().get_secret("SSH_JUMP_DEST")

os.environ["HF_TOKEN"] = user_secrets.get_secret("HF_TOKEN")

In [None]:
import paramiko, fabric
import io
import asyncio

class SSHConnection_Paramiko:
    def __init__(self):
        self.jump_client = paramiko.SSHClient()
        self.jump_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

        key_string = encrypted_private_key.replace("\\n", "\n")
        private_key = paramiko.RSAKey.from_private_key(
            io.StringIO(key_string),
            password = ssh_comment)
        self.jump_client.connect(
            hostname = ssh_hostname,
            username = ssh_username,
            pkey = private_key
        )

        transport = self.jump_client.get_transport()
        dest_addr = (ssh_jump_dest, 22)
        local_addr = (ssh_jump_gateway, 22)
        channel = transport.open_channel("direct-tcpip", dest_addr, local_addr)
        
        self.client = paramiko.SSHClient()
        self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        self.client.connect(
            hostname = ssh_jump_dest,
            username = ssh_username,
            pkey = private_key,
            sock = channel)

    async def stream_output(self, shell) -> AsyncGenerator[str, None]:
        """Stream output from shell with backpressure control"""
        while True:
            if shell.recv_ready():
                data = shell.recv(4096).decode('utf-8')
                if data:
                    yield data
            await asyncio.sleep(0.1)

    async def execute_interactive(self, command: str) -> AsyncGenerator[OutputChunk, None]:
        """Execute command and stream output chunks"""
        shell = self.client.invoke_shell()
        processor = StreamProcessor(PatternMatcher())
        
        # Send command
        shell.send(command + '\n')
        
        # Process output stream
        async for chunk in processor.process_stream(self.stream_output(shell)):
            yield chunk

class SSHConnection_Fabric:
    def __init__(self):
        key_string = encrypted_private_key.replace("\\n", "\n")
        private_key = paramiko.RSAKey.from_private_key(
            io.StringIO(key_string),
            password = ssh_comment)
        self.gateway_client = fabric.Connection(
            ssh_hostname,
            user = ssh_username,
            connect_kwargs = { 'pkey': private_key })
        self.client = fabric.Connection(
            ssh_jump_dest,
            user = ssh_username,
            connect_kwargs = { 'pkey': private_key },
            gateway = self.gateway_client)

    async def stream_output(self, shell) -> AsyncGenerator[str, None]:
        pass

    async def execute_interactive(self, command: str) -> AsyncGenerator[OutputChunk, None]:
        pass

class SSHConnection:
    def __init__(self, use_paramiko=True):
        if use_paramiko:
            self._impl = SSHConnection_Paramiko()
            self.client = self._impl.client
            stdin, stdout, stderr = self.client.exec_command('uname -a')
            for line in stdout:
                print(line.strip())
        else:
            self._impl = SSHConnection_Fabric()
            self.client = self._impl.client
            self.client.run('uname -a')

    async def stream_output(self, shell) -> AsyncGenerator[str, None]:
        async for chunk in self._impl.stream_output(shell):
            yield chunk

    async def execute_interactive(self, command: str) -> AsyncGenerator[OutputChunk, None]:
        async for chunk in self._impl.execute_interactive(command):
            yield chunk

In [None]:
import json
from enum import Enum
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Agent types and roles
class AgentRole(str, Enum):
    PROJECT_MANAGER = "project_manager"
    PROGRAMMER = "programmer"
    SPECIALIST = "specialist"


class LocalLLMManager:
    """Manages loading and inference for local LLMs"""
    
    def __init__(self, model_name: str = "microsoft/Phi-4-mini-instruct", device: str = None):
        """
        Initialize the LLM manager
        
        Args:
            model_name: HuggingFace model identifier
            device: Device to run the model on (None for auto-detection)
        """
        self.model_name = model_name
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        logger.info(f"Initializing LocalLLMManager with {model_name} on {self.device}")
        
        # Load model and tokenizer
        self.tokenizer = None
        self.model = None
        self.pipe = None
        
    async def load_model(self):
        """Load the model and tokenizer asynchronously"""
        # Run in a separate thread to avoid blocking
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(None, self._load_model_sync)
        logger.info(f"Model {self.model_name} loaded successfully")
    
    def _load_model_sync(self):
        """Synchronous model loading function"""
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            device_map=self.device
        )
        self.pipe = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            #device=0 if self.device == "cuda" else -1
        )
    
    async def generate(self, prompt: str, max_tokens: int = 512, 
                      temperature: float = 0.7) -> str:
        """
        Generate text using the loaded model
        
        Args:
            prompt: Input prompt for the model
            max_tokens: Maximum number of tokens to generate
            temperature: Temperature for sampling
            
        Returns:
            Generated text response
        """
        if not self.model or not self.tokenizer:
            await self.load_model()
            
        # Run in a separate thread to avoid blocking
        loop = asyncio.get_event_loop()
        response = await loop.run_in_executor(
            None, 
            lambda: self.pipe(
                prompt,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.95,
            )[0]["generated_text"]
        )
        
        # Extract only the newly generated text
        return response[len(prompt):].strip()

    async def generate_thought(self, 
                             agent_role: AgentRole,
                             thought_type: str,
                             context: str,
                             confidence_threshold: float = 0.7) -> Optional[Thought]:
        """
        Generate an internal thought with confidence estimation
        
        Args:
            agent_role: Role of the agent generating the thought
            thought_type: Type of thought to generate
            context: Context information for thought generation
            confidence_threshold: Minimum confidence threshold
            
        Returns:
            A Thought object or None if confidence is too low
        """
        # Create a prompt for thought generation with confidence estimation
        prompt = f"""
        You are a {agent_role.value} agent generating internal thoughts.
        
        Context:
        {context}
        
        Generate a {thought_type} thought about this situation.
        Also estimate your confidence in this thought from 0.0 to 1.0.
        
        Format your response as:
        Thought: [your thought here]
        Confidence: [confidence score between 0.0 and 1.0]
        """
        
        response = await self.generate(prompt, max_tokens=200, temperature=0.7)
        
        # Parse the response
        thought_content = ""
        confidence = 0.0
        
        for line in response.split('\n'):
            if line.startswith("Thought:"):
                thought_content = line[len("Thought:"):].strip()
            elif line.startswith("Confidence:"):
                try:
                    confidence = float(line[len("Confidence:"):].strip())
                except ValueError:
                    confidence = 0.0
        
        # Create and return thought if confidence is above threshold
        if confidence >= confidence_threshold and thought_content:
            return Thought(
                thought_type=thought_type,
                content=thought_content,
                confidence=confidence
            )
        return None
        
manager = LocalLLMManager()
manager.load_model()
response = await manager.generate("""
        You are a programmer agent generating internal thoughts.
        
        Context:
        You are supposed to find out how many hugepages have been assigned in total on a Linux system.
        The command line you generate should give out a single number of the result.
        
        Generate a observation thought about this situation.
        Also estimate your confidence in this thought from 0.0 to 1.0.
        
        Format your response as:
        Thought: [your thought here]
        Confidence: [confidence score between 0.0 and 1.0]
        """)
print(response)

In [None]:
from langchain_core.tools import BaseTool
from langgraph.checkpoint.memory import MemorySaver
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, AIMessage
from typing import List, Dict, Optional, AsyncGenerator
import json
from pydantic import BaseModel, Field, PrivateAttr
from datetime import datetime
import re



class OutputChunk(BaseModel):
    """Represents a chunk of command output"""
    content: str
    timestamp: datetime
    type: str = "stdout"  # stdout, stderr, or system
    requires_attention: bool = False
    pattern_matches: Dict[str, str] = Field(default_factory=dict)

class PatternMatcher:
    """Matches important patterns in command output"""
    def __init__(self):
        self.patterns = {
            'error': r'error|exception|failed|fatal',
            'prompt': r'\[y/N\]|\[Y/n\]|password:|continue\?',
            'progress': r'\d+%|\d+/\d+',
            'completion': r'(done|completed|finished|ready|upgraded).*$',
        }
        self.compiled_patterns = {
            k: re.compile(v, re.IGNORECASE) for k, v in self.patterns.items()
        }

    def analyze_chunk(self, text: str) -> Dict[str, str]:
        matches = {}
        for pattern_name, pattern in self.compiled_patterns.items():
            if found := pattern.search(text):
                matches[pattern_name] = found.group(0)
        return matches

class StreamProcessor:
    """Processes command output streams and chunks them intelligently"""
    def __init__(self, pattern_matcher: PatternMatcher):
        self.pattern_matcher = pattern_matcher
        self.buffer = ""
        self.chunk_size = 1024
        self.min_chunk_size = 100  # Minimum size to process

    def should_chunk(self, text: str) -> bool:
        """Determine if we should create a new chunk based on content"""
        if len(text) >= self.chunk_size:
            return True
        
        patterns = self.pattern_matcher.analyze_chunk(text)
        return bool(patterns)  # Chunk if we find any important patterns

    async def process_stream(self, stream: AsyncGenerator[str, None]) -> AsyncGenerator[OutputChunk, None]:
        async for data in stream:
            self.buffer += data
            
            while self.buffer:
                if len(self.buffer) < self.min_chunk_size and not self.should_chunk(self.buffer):
                    break
                    
                chunk_size = min(len(self.buffer), self.chunk_size)
                chunk_text = self.buffer[:chunk_size]
                self.buffer = self.buffer[chunk_size:]
                
                patterns = self.pattern_matcher.analyze_chunk(chunk_text)
                requires_attention = bool({'error', 'prompt', 'completion'} & patterns.keys())
                
                yield OutputChunk(
                    content=chunk_text,
                    timestamp=datetime.now(),
                    requires_attention=requires_attention,
                    pattern_matches=patterns
                )






class LinuxCommandTool(BaseTool):
    name: str = "linux_command"
    description: str = "Execute Linux commands and handle interactive output"

    _ssh: SSHConnection = PrivateAttr()
    
    def __init__(self, ssh_connection: SSHConnection):
        super().__init__()
        self._ssh = ssh_connection
    
    async def _run(self, command: str) -> AsyncGenerator[OutputChunk, None]:
        async for chunk in self._ssh.execute_interactive(command):
            yield chunk
    
class AgentResponse(BaseModel):
    """Structured response from the agent"""
    action: str  # "continue", "interact", "alert", "complete"
    response: str
    reasoning: str
    priority: int = 0

class StreamingAgent:
    def __init__(self, llm, tools: List[BaseTool]):
        self.llm = llm
        self.tools = tools
        self.memory = MemorySaver() #(max_history=10)
        self.current_context = []
        
    def create_prompt(self, chunks: List[OutputChunk]) -> str:
        """Create prompt for LLM based on recent chunks"""
        return f"""Analyze this command output stream and determine appropriate action:

Recent output:
{chunks[-5:]}  # Show last 5 chunks

Patterns detected:
{[chunk.pattern_matches for chunk in chunks[-5:]]}

Based on this output:
1. If you see a prompt/question, provide the appropriate response
2. If you detect an error, provide guidance
3. If the command is progressing normally, return "continue"
4. If the command has completed, provide a summary

Respond in JSON format:
{{
    "action": "continue/interact/alert/complete",
    "response": "your response or next action",
    "reasoning": "your analysis",
    "priority": 0-10  # Urgency of response
}}"""

    async def process_chunks(self, chunks: List[OutputChunk]) -> Optional[AgentResponse]:
        """Process chunks and determine if LLM analysis is needed"""
        # Quick pattern-based analysis first
        if any(chunk.requires_attention for chunk in chunks):
            # Important patterns detected, consult LLM
            prompt = self.create_prompt(chunks)
            response = await self.llm.ainvoke(prompt)
            print(response)
            return AgentResponse(**json.loads(response.content))

        # For normal output, accumulate more before consulting LLM
        if len(self.current_context) >= 5:  # Batch size
            prompt = self.create_prompt(self.current_context)
            response = await self.llm.ainvoke(prompt)
            self.current_context.clear()
            return AgentResponse(**json.loads(response.content))

        return None

    async def run(self, command: str):
        """Run command and process output stream"""
        tool = self.tools[0]  # Assume Linux command tool
        
        async for chunk in tool.run(command):
            self.current_context.append(chunk)
            
            if response := await self.process_chunks([chunk]):
                if response.action != "continue":
                    # Handle interactive needs or alerts
                    if response.action == "interact":
                        # Send response back to command
                        await tool.run(response.response)
                    
                    # Update memory
                    self.memory.save_context(
                        {"input": command},
                        {"output": response.reasoning}
                    )
                    
                    yield response

# Example usage
async def main():
    ssh = SSHConnection(use_paramiko = True)
    
    tools = [LinuxCommandTool(ssh)]
    llm = ChatOpenAI(temperature=0, openai_api_key=UserSecretsClient().get_secret("OPENAI_API_KEY"))
    agent = StreamingAgent(llm, tools)
    
    async for response in agent.run("sudo apt upgrade"):
        if response.action != "continue":
            print(f"Agent response: {response.model_dump()}")

if __name__ == "__main__":
    await main()