In [3]:
import json
import pandas as pd

with open("final_filtered.json", "r") as f:
    episodes = json.load(f)
    
    
count_door_win = 0
count_lost = 0

# sequences in  format like ['e1', 'e2', 'e5', ...]
def label_event_sequence_clean(events):
    global count_door_win, count_lost
    sequence = []
    key_collected = False
    explosive_collected = False
    
    for event in events:
        evt = event.get("event")

        if evt == "collect_item":
            item_type = event["item"][0]
            if item_type == "key":
                key_collected = True
                sequence.append("e1")
            elif item_type == "explosive":
                explosive_collected = True
                sequence.append("e2")

        elif evt == "failed_interaction":
            if event.get("item", [None])[0] == "key":
                sequence.append("e8")
            elif event.get("item", [None])[0] == "explosive":
                sequence.append("e7")

        elif evt == "interact":
            typ = event.get("type")
            what = event.get("item", [None])[0]
            if typ == "rock":
                if what == "explosive":
                    sequence.append("e5")
                elif what == "key":
                    sequence.append("e8")
            elif typ == "door":
                if what == "key":
                    sequence.append("e6")
                elif what == "explosive":
                    sequence.append("e7")

    # after looping, check missing items
    if not key_collected:
        sequence.append("e3")
    if not explosive_collected:
        sequence.append("e4")

    # finally, the outcome
    if any(e.get("event") == "game_won" for e in events):
        sequence.append("e9")
        count_door_win += 1
    else:
        sequence.append("e10")
        count_lost += 1

    return sequence

# Apply to all episodes
formatted_sequences = [label_event_sequence_clean(ep["events"]) for ep in episodes]

# Define episode ids
episode_ids = [f"ep_{i}" for i in range(len(episodes))]

# Save as CSV: each row = episode_id, sequence list
df = pd.DataFrame({
    "episode_id": episode_ids,
    "sequence": formatted_sequences
})

df.to_csv("sequence_of_sets_formatted.csv", index=False)
df.head(80)
print("Done! Saved to 'sequence_of_sets_formatted.csv'")

# Print summary
print("Summary:")
print(f"  Total episodes       : {len(episodes)}")
print(f"  Won by blue door (e9): {count_door_win}")
print(f"  Lost (e10)           : {count_lost}")
print("Done! Saved to 'sequence_of_sets_formatted.csv'")


Done! Saved to 'sequence_of_sets_formatted.csv'
Summary:
  Total episodes       : 284
  Won by blue door (e9): 80
  Lost (e10)           : 204
Done! Saved to 'sequence_of_sets_formatted.csv'
