In [1]:
import os
import json
import pandas as pd
import json

import plotly.graph_objects as go
from plotly.subplots import make_subplots


import json
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import re

# Download necessary NLTK data (run this once)
nltk.download('vader_lexicon')
nltk.download('punkt')


# set the resolution of the plots
resolution_x = 3000
resolution_y = 2000

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/kai/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/kai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
'''extract the data from the json files in each experiment folder and store it in a pandas dataframe'''

# get the experiment folders
experiments_folder = 'logs'
experiment_folders = [f for f in os.listdir(
    experiments_folder) if os.path.isdir(os.path.join(experiments_folder, f))]

# experiment folders is called `exp`
experiment_folders = [os.path.join(experiments_folder, exp)
                      for exp in experiment_folders]


# how many experiments are there?
print(f"Found {len(experiment_folders)} experiments")


data = []
for folder in experiment_folders:
    exp_family = folder.split("_")[1]  # Extract family from folder name
    log_file = os.path.join(folder, "logs.json")
    if os.path.exists(log_file):
        with open(log_file, "r") as f:
            logs = json.load(f)
            for step, step_data in logs.items():
                entry = {
                    "exp_name": folder,
                    # remove the "-" and everything after it
                    "scenario": exp_family.split("-")[0],
                    "step": step,
                    "action": step_data["logs"]["action"]["message"] if "action" in step_data["logs"] else None,
                    "plan": step_data["logs"]["plan"]["message"] if "plan" in step_data["logs"] else None,
                    "memory": step_data["logs"]["memory"]["message"] if "memory" in step_data["logs"] else None,
                    "observation": step_data["logs"]["observation"]["message"] if "observation" in step_data["logs"] else None,
                    "task": step_data["logs"]["task"]["message"] if "task" in step_data["logs"] else None,

                }
                data.append(entry)

df = pd.DataFrame(data)

# add a column for the action type and a column for the action value
df["action_type"] = df["action"].apply(
    lambda x: json.loads(x)["action"] if x else None)
df["action_value"] = df["action"].apply(
    lambda x: json.loads(x)["value"] if x else None)

# each observation is a dictionary with keys left, front, right, compass.
# we will extract these into separate columns if exists
df["observation_left"] = df["observation"].apply(
    lambda x: json.loads(x)["observation"]["left"] if x else None)
df["observation_front"] = df["observation"].apply(
    lambda x: json.loads(x)["observation"]["front"] if x else None)
df["observation_right"] = df["observation"].apply(
    lambda x: json.loads(x)["observation"]["right"] if x else None)
df["observation_compass"] = df["observation"].apply(
    lambda x: json.loads(x)["observation"]["compass"] if x else None)

# find all actions that are "finish"
df_finish = df[df["action_type"] == "finish"]

# split df into two dataframes: one for the successful experiments and one for the failed experiments
# successful experiments are all exp_name that are in df_finish
df_successful_exp = df[df["exp_name"].isin(df_finish["exp_name"])]
# failed experiments are all exp_name that are not in df_finish
df_failed_exp = df[~df["exp_name"].isin(df_finish["exp_name"])]

# remove rows  that have more than one None values in a raw
df_successful_exp = df_successful_exp.dropna(thresh=5)
df_failed_exp = df_failed_exp.dropna(thresh=5)

Found 100 experiments


This script does the following:

- It uses NLTK's VADER sentiment analyzer to classify the overall sentiment of the agent's thoughts and observations.
- It implements a simple rule-based approach to identify specific emotions based on keywords.
- The process_row function takes a data row, extracts the relevant fields, cleans the text, and then applies both sentiment classification and emotion identification.
- The script provides an example usage with a sample data row.

To use this with your full dataset, you would typically load your data into a pandas DataFrame and apply the process_row function to each row:



In [3]:
# Initialize the NLTK sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Function to clean text


def clean_text(text):
    # Remove special characters and digits
    text = re.sub(r'[^a-zA-Z\s]', '', str(text))
    return text.lower()

# Function to classify sentiment


def classify_sentiment(text):
    sentiment_scores = sia.polarity_scores(text)
    if sentiment_scores['compound'] > 0.05:
        return 'positive'
    elif sentiment_scores['compound'] < -0.05:
        return 'negative'
    else:
        return 'neutral'

# Main function to process a data row


def process_row(row):
    try:
        # Extract relevant fields
        plan = json.loads(row['plan'])['thought'] if row['plan'] else ''
        observations = [
            str(row['observation_left']),
            str(row['observation_front']),
            str(row['observation_right']),
            str(row['observation_compass'])
        ]

        # Clean and combine text
        all_text = clean_text(plan + ' ' + ' '.join(observations))

        # Classify sentiment
        sentiment = classify_sentiment(all_text)

        return sentiment
    except (json.JSONDecodeError, TypeError, KeyError):
        # Return 'unknown' if there's an error processing the row
        return 'unknown'


# Assuming df_successful_exp is your DataFrame
# Apply the process_row function to each row
df_successful_exp['sentiment'] = df_successful_exp.apply(process_row, axis=1)
# do the same for the failed experiments
df_failed_exp['sentiment'] = df_failed_exp.apply(process_row, axis=1)

# Convert 'step' to numeric type first, then sort
df_successful_exp['step'] = pd.to_numeric(
    df_successful_exp['step'], errors='coerce')
df_failed_exp['step'] = pd.to_numeric(df_failed_exp['step'], errors='coerce')

# Now sort the dataframes
df_successful_exp = df_successful_exp.sort_values(['exp_name', 'step'])
df_failed_exp = df_failed_exp.sort_values(['exp_name', 'step'])

# reset the index
df_successful_exp = df_successful_exp.reset_index(drop=True)
df_failed_exp = df_failed_exp.reset_index(drop=True)

In [4]:
# Define colors for start points and reactions
start_colors = {
    "base": "#66c5cc", "male": "#f6cf71", "night": "#f89c74",
    "tokyo": "#dcb0f2", "winter": "#87c55f"
}
reaction_colors = {
    "Finish": "rgb(255, 49, 152)",
    "Positive": "rgb(126, 197, 236)",
    "Negative": "pink"
}


def add_traces_with_lines(df, fig, row, col):
    for exp_name in df['exp_name'].unique():
        df_exp = df[df['exp_name'] == exp_name]

        x_values = df_exp.apply(
            lambda x: x["action_value"] if x["action_type"] == "forward" else 2,
            axis=1
        ).cumsum()
        x_values -= x_values.iloc[0]

        cleaned_name = exp_name.split("/logs/")[-1]
        exp_type = next((key for key in start_colors.keys()
                        if key in cleaned_name), None)
        start_color = start_colors.get(exp_type, "gray")

        colors = [
            'rgb(126, 197, 236)' if sentiment == 'positive' else
            'rgb(200,200,200)' if sentiment == 'neutral' else 'pink'
            for sentiment in df_exp['sentiment']
        ]
        colors = ['rgb(255, 49, 152)' if action == 'finish' else color
                  for action, color in zip(df_exp['action_type'], colors)]

        y_values = [cleaned_name] * len(df_exp)

        # Add starting point marker
        fig.add_trace(go.Scatter(
            x=[x_values.iloc[0]], y=[cleaned_name],
            mode='markers',
            marker=dict(size=40, symbol='circle', color='rgba(255,255,255,.8)',
                        line=dict(width=4, color=start_color)),
            name=exp_type.capitalize() if exp_type else "Unknown",
            legendgroup="start",
            showlegend=False,
        ), row=row, col=col)

        # Add main line and markers
        fig.add_trace(go.Scatter(
            x=x_values, y=y_values,
            mode='lines+markers',
            marker=dict(
                line=dict(width=1.5, color='grey'),
                color=colors,
                size=[20 if action !=
                      'finish' else 30 for action in df_exp['action_type']],
                opacity=.85
            ),
            line=dict(color='black', width=0.5, dash='dot'),
            showlegend=False
        ), row=row, col=col)


# Calculate subplot heights
successful_height = 40 * len(df_successful_exp['exp_name'].unique())
failed_height = 45 * len(df_failed_exp['exp_name'].unique())
total_height = successful_height + failed_height
row_heights = [successful_height / total_height, failed_height / total_height]

# Create subplot figure
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    row_heights=row_heights,
    vertical_spacing=0.05,
    subplot_titles=["Successful Experiments", "Failed Experiments"],
)

# Add traces for successful and failed experiments
add_traces_with_lines(df_successful_exp, fig, row=1, col=1)
add_traces_with_lines(df_failed_exp, fig, row=2, col=1)

# Update layout
fig.update_layout(
    height=resolution_y,
    width=resolution_x,
    font=dict(size=20),
    plot_bgcolor='rgba(255, 255, 255,1)',
    paper_bgcolor='white',
    legend=dict(
        title="Scenario",
        # itemsizing="constant",
        font=dict(size=24),
        x=1.1, y=1,
        xanchor='left', yanchor='top',

    ),
    yaxis=dict(tickfont=dict(size=10)),
    yaxis2=dict(tickfont=dict(size=10)),
    annotations=[dict(font=dict(size=40))]
    
)

# Add scenario types to legend
for scenario, color in start_colors.items():
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=40, symbol='circle', color='white',
                    line=dict(width=6, color=color)),
        name=scenario.capitalize(),
        legendgroup="scenarios"
    ))

# Add reaction types to legend
for name, color in reaction_colors.items():
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(color=color, size=24),
        name=name,
        legendgroup="reaction",
        legendgrouptitle_text='Reaction Types',
        
    ))

# Show and save figure
fig.show()
fig.write_image("ta-sentiment.png",
                width=resolution_x, height=resolution_y, engine="kaleido")