In [None]:
import os
import json
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
from typing import Dict, Any
from osw_data import MultiAgentDataset, AgentMetadata, MediaType, PointType

class BabyAIDatasetConverter:
    def __init__(self, trajectory_path: str, output_path: str):
        """
        Initialize the BabyAI dataset converter.
        
        Args:
            trajectory_path: Path to BabyAI trajectory data
            output_path: Path to save the converted dataset
        """
        self.trajectory_path = Path(trajectory_path)
        self.output_path = Path(output_path)
        self.output_path.mkdir(parents=True, exist_ok=True)

    def convert_to_dataset(self):
        """Convert BabyAI trajectories to MultiAgentDataset format."""
        # Create multi-agent dataset
        dataset = MultiAgentDataset(
            name="BabyAI Interactions",
            base_path=self.output_path,
            description="BabyAI game interaction trajectories"
        )

        # Process each trajectory file
        for trajectory_file in self.trajectory_path.glob('*.json'):  # Assuming JSON format
            with open(trajectory_file, 'r') as f:
                trajectory_data = json.load(f)

            # Create agents metadata
            agents_metadata = {
                "agent": AgentMetadata(
                    agent_id="agent",
                    agent_type="game_agent",
                    capabilities=["navigation", "interaction", "language_understanding"],
                ),
                "user": AgentMetadata(
                    agent_id="user",
                    agent_type="instruction_giver",
                    capabilities=["task_instruction"],
                )
            }

            # Create instance
            instance_id = dataset.create_instance(
                agents_metadata=agents_metadata,
                instance_metadata={
                    "task": trajectory_data.get('mission', ''),
                    "env_name": trajectory_data.get('env_name', ''),
                    "level": trajectory_data.get('level', 0)
                }
            )

            # Add initial instruction
            dataset.add_data_point(
                instance_id=instance_id,
                agent_id="user",
                timestamp=datetime.now(),
                point_type=PointType.ACTION,
                data={"text": trajectory_data.get('mission', '')},
                media_type=MediaType.JSON
            )

            # Process trajectory steps
            for step in trajectory_data.get('trajectory', []):
                dataset.add_data_point(
                    instance_id=instance_id,
                    agent_id="agent",
                    timestamp=datetime.now(),
                    point_type=PointType.ACTION,
                    data={
                        "action": step.get('action', ''),
                        "reward": step.get('reward', 0),
                        "done": step.get('done', False),
                        "observation": step.get('observation', {}),
                        "info": step.get('info', {})
                    },
                    media_type=MediaType.JSON
                )

        # Close dataset
        dataset.close()

if __name__ == "__main__":
    converter = BabyAIDatasetConverter(
        trajectory_path="/Users/anantsinha/Desktop/BabyAI Trajectorries/processed_trajectories",
        output_path="/Users/anantsinha/Desktop/DEMO"
    )
    converter.convert_to_dataset()