In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# --- Configuration ---
# Set the style for our plots to make them look nice
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (18, 8)
plt.rcParams["font.size"] = 12

# Define the path to your analysis file
# The filename is the one you provided.
file_path = "outputs/analysis_Phi-4-reasoning-plus_20250617_235420.json"

print(f"Ready to analyze file: {file_path}")

Ready to analyze file: outputs/analysis_Phi-4-reasoning-plus_20250617_235420.json


In [2]:
# Load the JSON data into a pandas DataFrame for easy manipulation

try:
    with open(file_path, "r") as f:
        data = json.load(f)
    
    # Convert the list of dictionaries directly into a DataFrame
    df = pd.DataFrame(data)
    
    print("File loaded successfully!")
    print(f"Total number of generated tokens: {len(df)}")
    
    # Display the first few rows to verify the structure
    print("\nData Head:")
    display(df.head())
    
    # Display summary information about the DataFrame
    print("\nData Info:")
    df.info()

except FileNotFoundError:
    print(f"ERROR: The file was not found at {file_path}")
    print("Please make sure the filename and path are correct.")
except Exception as e:
    print(f"An error occurred: {e}")

File loaded successfully!
Total number of generated tokens: 32768

Data Head:


Unnamed: 0,step,chosen_token,chosen_token_prob,top_k_predictions
0,1,<think>,1.0,"[{'token': '<think>', 'probability': 1.0}, {'t..."
1,2,We,1.0,"[{'token': 'We', 'probability': 1.0}, {'token'..."
2,3,are,1.0,"[{'token': ' are', 'probability': 1.0}, {'toke..."
3,4,given,0.707031,"[{'token': ' given', 'probability': 0.70703125..."
4,5,a,0.816406,"[{'token': ' a', 'probability': 0.81640625}, {..."



Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32768 entries, 0 to 32767
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   step               32768 non-null  int64  
 1   chosen_token       32768 non-null  object 
 2   chosen_token_prob  32768 non-null  float64
 3   top_k_predictions  32768 non-null  object 
dtypes: float64(1), int64(1), object(2)
memory usage: 1.0+ MB


In [3]:
# Now, let's create the main plot to visualize the variance at each step.

if 'df' in locals() and not df.empty:
    plt.figure()
    
    # Create a line plot of the variance
    lineplot = sns.lineplot(
        x=df.index, 
        y=df['variance'], 
        label='Token Probability Variance',
        linewidth=2
    )
    
    # --- Optional: Use a logarithmic scale if variance values are very spiky ---
    # If your peaks are extremely high and valleys are very low, a log scale can help.
    # Uncomment the line below to try it.
    # plt.yscale('log')

    plt.title('Model Confidence (Variance) at Each Generation Step', fontsize=16, weight='bold')
    plt.xlabel('Generation Step (Token Number)', fontsize=12)
    plt.ylabel('Variance of Probability Distribution', fontsize=12)
    plt.legend()
    plt.show()
else:
    print("DataFrame 'df' not available. Please run the previous cell successfully.")

KeyError: 'variance'

<Figure size 1800x800 with 0 Axes>

In [None]:
# Find the top 5 steps where the model was most confident.

if 'df' in locals() and not df.empty:
    # We select the relevant columns for a clean view
    columns_to_show = ['step', 'chosen_token', 'variance', 'chosen_token_prob']
    
    highest_variance_tokens = df.nlargest(5, 'variance')[columns_to_show]
    
    print("Top 5 Most Confident Steps (Highest Variance):")
    display(highest_variance_tokens)
else:
    print("DataFrame 'df' not available.")

In [None]:
# Find the top 5 steps where the model was most uncertain.

if 'df' in locals() and not df.empty:
    # We select the same columns for a clean view
    columns_to_show = ['step', 'chosen_token', 'variance', 'chosen_token_prob']
    
    lowest_variance_tokens = df.nsmallest(5, 'variance')[columns_to_show]
    
    print("Top 5 Most Uncertain Steps (Lowest Variance):")
    display(lowest_variance_tokens)
else:
    print("DataFrame 'df' not available.")