In [None]:
import os
import json
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
from typing import Dict, Any

from osw_data import MultiAgentDataset, AgentMetadata, MediaType, PointType

class MiniHackDatasetConverter:
    def __init__(self, trajectory_path: str, output_path: str):
        self.trajectory_path = Path(trajectory_path)
        self.output_path = Path(output_path)
        self.output_path.mkdir(parents=True, exist_ok=True)

    def convert_to_dataset(self):
        # Create multi-agent dataset
        dataset = MultiAgentDataset(
            name="MiniHack Interactions",
            base_path=self.output_path,
            description="MiniHack game interaction trajectories"
        )

        # Process each trajectory CSV
        for csv_file in self.trajectory_path.glob('*_run_*.csv'):
            trajectory_df = pd.read_csv(csv_file)
            
            # Create agents metadata
            agents_metadata = {
                "agent": AgentMetadata(
                    agent_id="agent",
                    agent_type="game_agent",
                    capabilities=["navigation", "interaction"],
                ),
                "user": AgentMetadata(
                    agent_id="user",
                    agent_type="instruction_giver",
                    capabilities=["task_instruction"],
                )
            }

            # Create instance
            instance_id = dataset.create_instance(
                agents_metadata=agents_metadata,
                instance_metadata={
                    "task": trajectory_df['task'].iloc[0],
                    "llm": trajectory_df['llm'].iloc[0]
                }
            )

            # Add initial task instruction
            dataset.add_data_point(
                instance_id=instance_id,
                agent_id="user",
                timestamp=datetime.now(),
                point_type=PointType.ACTION,
                data={"text": trajectory_df['task'].iloc[0]},
                media_type=MediaType.JSON
            )

            # Process trajectory details
            for _, row in trajectory_df.iterrows():
                # Add trajectory data points
                dataset.add_data_point(
                    instance_id=instance_id,
                    agent_id="agent",
                    timestamp=datetime.now(),
                    point_type=PointType.ACTION,
                    data={
                        "episode_return": row['episode_return'],
                        "num_steps": row['num_steps'],
                        "done": row['done'],
                        "action_frequency": json.loads(row['action_frequency']),
                        "progression": row['progression']
                    },
                    media_type=MediaType.JSON
                )

        # Close dataset
        dataset.close()

# Usage
if __name__ == "__main__":
    converter = MiniHackDatasetConverter(
        trajectory_path="/Users/anantsinha/Desktop/processed_trajectories",
        output_path="/Users/anantsinha/Desktop/MINIHACK_TRAJECTORY"
    )
    converter.convert_to_dataset()