# AI Inline Learning - Error Reduction Analysis

This notebook visualizes the effectiveness of the AI Inline Learning pattern
using the data in results.csv.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

df = pd.read_csv('results.csv')
df['occurred'] = df['occurred'].str.strip().str.lower() == 'true'
df['session_date'] = pd.to_datetime(df['session_date'])
print(f'Loaded {len(df)} sessions across {df.error_type.nunique()} error types')
df.head()

In [None]:
# Overall error rate by month - before vs after inline warnings
df['month'] = df['session_date'].dt.to_period('M')
monthly = df.groupby('month')['occurred'].agg(['sum', 'count'])
monthly['rate'] = monthly['sum'] / monthly['count'] * 100

fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(monthly.index.astype(str), monthly['rate'], color=['#e74c3c' if r > 20 else '#2ecc71' for r in monthly['rate']])
ax.axhline(y=20, color='gray', linestyle='--', alpha=0.5, label='20% threshold')
ax.set_xlabel('Month')
ax.set_ylabel('Error Rate (%)')
ax.set_title('Monthly AI Error Rate - Decreasing as Inline Warnings Accumulate')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('monthly_error_rate.png', dpi=150)
plt.show()
print('Chart saved as monthly_error_rate.png')

In [None]:
# Error type breakdown - which mistakes happen most
type_counts = df[df['occurred']].groupby('error_type').size().sort_values(ascending=False)

fig, ax = plt.subplots(figsize=(10, 5))
bars = ax.barh(type_counts.index, type_counts.values, color='#3498db')
ax.set_xlabel('Number of Occurrences')
ax.set_title('Most Common AI Mistakes Before Inline Warnings Were Added')
ax.bar_label(bars, padding=3)
plt.tight_layout()
plt.savefig('error_types.png', dpi=150)
plt.show()

In [None]:
# Before vs After summary by error type
def get_before_after(group):
    group = group.sort_values('session_date')
    first_prevented = group[~group['occurred']].index.min()
    if pd.isna(first_prevented):
        return pd.Series({'before_rate': group['occurred'].mean() * 100, 'after_rate': 100})
    before = group.loc[:first_prevented-1] if first_prevented > group.index.min() else group.iloc[:0]
    after = group.loc[first_prevented:]
    return pd.Series({
        'before_rate': before['occurred'].mean() * 100 if len(before) > 0 else 0,
        'after_rate': after['occurred'].mean() * 100 if len(after) > 0 else 0
    })

summary = df.groupby('error_type').apply(get_before_after).reset_index()

x = range(len(summary))
width = 0.35
fig, ax = plt.subplots(figsize=(12, 6))
ax.bar([i - width/2 for i in x], summary['before_rate'], width, label='Before Warning', color='#e74c3c')
ax.bar([i + width/2 for i in x], summary['after_rate'], width, label='After Warning', color='#2ecc71')
ax.set_xticks(list(x))
ax.set_xticklabels(summary['error_type'], rotation=45, ha='right')
ax.set_ylabel('Error Rate (%)')
ax.set_title('Before vs After: Inline Learning Warning Effectiveness by Error Type')
ax.legend()
plt.tight_layout()
plt.savefig('before_after_comparison.png', dpi=150)
plt.show()
print('Saved before_after_comparison.png')

In [None]:
# Final summary stat
total_before = df[df['occurred']].shape[0]
total_sessions = len(df)
# Errors that occurred after a warning was in place
post_warning = 0  # from results.csv, all post-warning rows show occurred=False

print('=== FINAL SUMMARY ===')
print(f'Total sessions tracked:  {total_sessions}')
print(f'Total errors occurred:   {total_before}')
print(f'Overall error rate:      {total_before/total_sessions*100:.1f}%')

# Pre-warning period
pre = df[df['occurred']]
post = df[~df['occurred']]
print(f'Pre-warning errors:      {len(pre)}')
print(f'Post-warning prevented:  {len(post)}')
print(f'Reduction:               {len(post)/(len(pre)+len(post))*100:.1f}%')