# AI Tool Combinations Analysis: Code Generation & Review Patterns

**Research Question:** What combinations of AI tools for code generation and review are prevalent?

## Objectives:
1. Identify which AI agents generate code vs which ones review code
2. Map the most common AI generator → AI reviewer combinations
3. Analyze success rates of different tool combinations
4. Understand workflow patterns in multi-AI environments

## Dataset: AIDev - AI Teammates in Software Engineering
- 456,535 AI-generated Pull Requests
- 5 Major AI Agents: OpenAI Codex, GitHub Copilot, Devin, Cursor, Claude Code
- Complete review histories with human and bot reviewers

In [None]:
# Environment Setup and Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import warnings
from datetime import datetime
import re

# Configure display and plotting
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')
warnings.filterwarnings('ignore')

print('🔧 Environment Setup Complete!')
print(f'📚 Libraries loaded for AI tool combination analysis')
print(f'⏰ Analysis started at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')

In [None]:
# Try these installation methods in order until one works

import sys
import subprocess

print("🔧 Installing Plotly with multiple methods...\n")

# Method 1: Standard installation in current kernel
print("Method 1: Standard installation")
try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "plotly"])
    print("✅ Method 1 successful")
except:
    print("❌ Method 1 failed")

print("\n" + "-"*50)

# Method 2: Force reinstall
print("Method 2: Force reinstall")
try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "plotly"])
    print("✅ Method 2 successful")
except:
    print("❌ Method 2 failed")

print("\n" + "-"*50)

# Method 3: User installation
print("Method 3: User installation")
try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--user", "plotly"])
    print("✅ Method 3 successful")
except:
    print("❌ Method 3 failed")

print("\n🔄 NOW RESTART YOUR KERNEL!")
print("   Kernel → Restart → Run All")

# Test import
print("\n🧪 Testing import...")
try:
    import plotly.express as px
    print("✅ SUCCESS: Plotly imported!")
except ImportError as e:
    print(f"❌ Still failed: {e}")
    print("RESTART YOUR KERNEL and try importing again!")

In [None]:
import sys
import subprocess

# Install PyArrow for parquet support
subprocess.check_call([sys.executable, "-m", "pip", "install", "pyarrow"])
print("✅ PyArrow installed!")

# Also install datasets library for HuggingFace
subprocess.check_call([sys.executable, "-m", "pip", "install", "datasets"])
print("✅ Datasets library installed!")

In [None]:
import subprocess
import sys

# Install fastparquet properly
subprocess.check_call([sys.executable, "-m", "pip", "install", "fastparquet"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])

print("✅ Packages installed - now try the loading strategies")

In [None]:
import pandas as pd
import numpy as np

# Strategy 1: fastparquet
try:
    pull_requests_df = pd.read_parquet(
        "hf://datasets/hao-li/AIDev/all_pull_request.parquet", 
        engine="fastparquet"
    )
    print(f"✅ SUCCESS! Shape: {pull_requests_df.shape}")
except Exception as e:
    print(f"❌ Failed: {e}")
    # Try Strategy 2: Direct download
    import requests
    import io
    
    url = "https://huggingface.co/datasets/hao-li/AIDev/resolve/main/all_pull_request.parquet"
    response = requests.get(url)
    pull_requests_df = pd.read_parquet(io.BytesIO(response.content))
    print(f"✅ Direct download SUCCESS! Shape: {pull_requests_df.shape}")

In [None]:
import pandas as pd
import numpy as np

# Try loading the AIDev dataset
try:
    pull_requests_df = pd.read_parquet(
        "hf://datasets/hao-li/AIDev/all_pull_request.parquet", 
        engine="fastparquet"
    )
    print(f"✅ SUCCESS! Shape: {pull_requests_df.shape}")
    print(f"Columns: {list(pull_requests_df.columns)}")
    print("\nFirst few rows:")
    print(pull_requests_df.head(2))
    
except Exception as e:
    print(f"❌ fastparquet failed: {e}")
    print("Trying direct download...")
    
    import requests
    import io
    
    url = "https://huggingface.co/datasets/hao-li/AIDev/resolve/main/all_pull_request.parquet"
    response = requests.get(url)
    pull_requests_df = pd.read_parquet(io.BytesIO(response.content))
    print(f"✅ Direct download SUCCESS! Shape: {pull_requests_df.shape}")
    print(f"Columns: {list(pull_requests_df.columns)}")

In [None]:
# Explore the real AI agents
ai_agents = pull_requests_df["agent"].value_counts()
print("🤖 AI AGENTS IN REAL DATASET:")
print(ai_agents.head(10))

print(f"\nTotal PRs with AI agents: {pull_requests_df['agent'].notna().sum():,}")
print(f"Percentage of AI-assisted PRs: {pull_requests_df['agent'].notna().mean()*100:.1f}%")

# Show sample PRs for each AI tool
for agent in ai_agents.head(3).index:
    sample_prs = pull_requests_df[pull_requests_df["agent"] == agent]["title"].head(2)
    print(f"\n{agent} examples:")
    for title in sample_prs:
        print(f"  - {title[:70]}...")

In [None]:
# LOAD PR REVIEW DATA FOR AI COMBINATION ANALYSIS
print('📂 LOADING PR REVIEW DATA')
print('=' * 35)

# Try to load review data using the same successful method
review_tables = {}

# Common review table names in research datasets
possible_review_tables = [
    'all_pr_reviews',
    'pr_reviews', 
    'all_pr_review_comments',
    'pr_review_comments',
    'all_reviews',
    'reviews'
]

print('🔄 Attempting to load review tables...')

for table_name in possible_review_tables:
    try:
        print(f'  🔍 Trying {table_name}...')
        
        # Use the same fastparquet method that worked for pull requests
        review_df = pd.read_parquet(
            f"hf://datasets/hao-li/AIDev/{table_name}.parquet", 
            engine="fastparquet"
        )
        
        review_tables[table_name] = review_df
        print(f'  ✅ {table_name}: {review_df.shape[0]:,} rows, {review_df.shape[1]} columns')
        
        # Show columns for first successful table
        if len(review_tables) == 1:
            print(f'      Columns: {list(review_df.columns)}')
            print(f'      Sample data:')
            print(f'      {review_df.head(2)}')
        
    except Exception as e:
        print(f'  ❌ {table_name}: {str(e)[:50]}...')

print(f'\n📊 REVIEW DATA LOADING RESULTS:')
print(f'   • Successfully loaded {len(review_tables)} table(s)')

if review_tables:
    # Analyze the loaded review data
    print(f'\n🔍 REVIEW DATA STRUCTURE ANALYSIS:')
    print('=' * 35)
    
    for table_name, df in review_tables.items():
        print(f'\n📋 {table_name.upper()}:')
        print(f'   • Shape: {df.shape}')
        print(f'   • Columns: {list(df.columns)}')
        
        # Look for key columns we need for analysis
        key_columns = {
            'pr_linking': [col for col in df.columns if any(keyword in col.lower() for keyword in ['pr', 'pull', 'request'])],
            'reviewer_info': [col for col in df.columns if any(keyword in col.lower() for keyword in ['user', 'login', 'author', 'reviewer'])],
            'review_outcome': [col for col in df.columns if any(keyword in col.lower() for keyword in ['state', 'status', 'decision', 'approved'])],
            'timing': [col for col in df.columns if any(keyword in col.lower() for keyword in ['time', 'date', 'created', 'submitted'])]
        }
        
        print(f'   🔗 PR Linking columns: {key_columns["pr_linking"]}')
        print(f'   👤 Reviewer columns: {key_columns["reviewer_info"]}')
        print(f'   ✅ Review outcome columns: {key_columns["review_outcome"]}')
        print(f'   ⏰ Timing columns: {key_columns["timing"]}')
        
        # Check for bot/AI reviewers
        reviewer_columns = key_columns["reviewer_info"]
        if reviewer_columns:
            reviewer_col = reviewer_columns[0]  # Use first reviewer column
            print(f'\n🤖 CHECKING FOR AI/BOT REVIEWERS in {reviewer_col}:')
            
            # Sample unique reviewers
            unique_reviewers = df[reviewer_col].dropna().unique()
            
            # Look for bot patterns
            bot_reviewers = [r for r in unique_reviewers if isinstance(r, str) and 'bot' in r.lower()]
            ai_patterns = [r for r in unique_reviewers if isinstance(r, str) and any(pattern in r.lower() for pattern in ['copilot', 'ai', 'claude', 'gpt', 'codex'])]
            
            print(f'   🤖 Bot reviewers found: {len(bot_reviewers)}')
            if bot_reviewers:
                print(f'      Sample bots: {bot_reviewers[:5]}')
            
            print(f'   🧠 AI-pattern reviewers found: {len(ai_patterns)}')
            if ai_patterns:
                print(f'      Sample AI reviewers: {ai_patterns[:5]}')
            
            print(f'   👥 Total unique reviewers: {len(unique_reviewers):,}')
            print(f'   📊 Sample reviewers: {list(unique_reviewers[:10])}')

    print(f'\n🎯 NEXT STEPS:')
    print('1. Link PR creation data (with AI agents) to review data')
    print('2. Identify AI tool (creator) + reviewer combinations') 
    print('3. Analyze approval rates for different combinations')
    print('4. Find most common AI creator → reviewer patterns')

else:
    print('❌ No review tables loaded successfully')
    print('\n🔍 Let\'s check what files are actually available in the dataset:')
    
    # Try to list available files
    try:
        from datasets import get_dataset_config_names, get_dataset_split_names
        
        print('Checking available dataset configurations...')
        configs = get_dataset_config_names("hao-li/AIDev")
        print(f'Available configs: {configs}')
        
        splits = get_dataset_split_names("hao-li/AIDev")
        print(f'Available splits: {splits}')
        
    except Exception as e:
        print(f'Could not check dataset structure: {e}')
        
        print('\n📝 Alternative: Create sample review data for analysis demonstration')
        print('Would you like to proceed with sample data to show the analysis method?')

print(f'\n✅ Review data loading phase complete!')

In [None]:
# COMPLETE AI TOOL COMBINATIONS ANALYSIS - ALL AI-TO-AI COMBINATIONS
print('AI CREATOR → REVIEWER COMBINATIONS (REAL DATA)')
print('=' * 50)

# Use the correct column names from the merge
reviews_df = review_tables['pr_reviews']
merged_data = pull_requests_df.merge(
    reviews_df, 
    left_on='id',       
    right_on='pr_id',   
    how='inner',
    suffixes=('_pr', '_review')
)

print(f'Data Overview:')
print(f'   • AI PRs with reviews: {len(merged_data):,}')
print(f'   • Coverage: {(len(merged_data)/len(pull_requests_df)*100):.1f}% of AI PRs have reviews')

# Simple combination counting - just frequency
combinations = merged_data.groupby(['agent', 'user_review']).size().reset_index(name='review_count')
combinations.columns = ['ai_creator', 'reviewer', 'review_count']
combinations = combinations.sort_values('review_count', ascending=False)

print(f'\nFound {len(combinations):,} unique AI creator → reviewer combinations')

print(f'\nTOP 15 AI CREATOR → REVIEWER COMBINATIONS:')
print('-' * 55)
print(f'{"#":<3} {"AI Creator":<15} {"Reviewer":<25} {"Reviews":<8}')
print('-' * 55)
for i, (_, row) in enumerate(combinations.head(15).iterrows(), 1):
    creator = str(row['ai_creator'])[:14]
    reviewer = str(row['reviewer'])[:24]
    count = int(row['review_count'])
    
    print(f'{i:<3} {creator:<15} {reviewer:<25} {count:<8,}')

# AI/Bot reviewers identification
bot_reviewers = [r for r in combinations['reviewer'].unique() 
                if isinstance(r, str) and '[bot]' in r.lower()]

print(f'\nAI/BOT REVIEWERS IDENTIFIED: {len(bot_reviewers)}')
print('ALL AI/BOT REVIEWERS:')
print('-' * 40)
for i, bot in enumerate(bot_reviewers, 1):
    print(f'  {i:2d}. {bot}')

# ALL AI creator → AI reviewer combinations
ai_to_ai = combinations[combinations['reviewer'].isin(bot_reviewers)]
ai_to_ai = ai_to_ai.sort_values('review_count', ascending=False)

print(f'\n🤖 ALL AI CREATOR → AI REVIEWER COMBINATIONS ({len(ai_to_ai)} total):')
print('=' * 70)
print(f'{"#":<4} {"AI Creator":<18} {"AI Reviewer":<35} {"Reviews":<10}')
print('=' * 70)

# Show ALL AI-to-AI combinations
for i, (_, row) in enumerate(ai_to_ai.iterrows(), 1):
    creator = str(row['ai_creator'])[:17]
    reviewer = str(row['reviewer'])[:34]
    count = int(row['review_count'])
    
    print(f'{i:<4} {creator:<18} {reviewer:<35} {count:<10,}')

# Detailed analysis by AI creator
print(f'\n📊 DETAILED BREAKDOWN BY AI CREATOR:')
print('=' * 50)

for creator in combinations['ai_creator'].unique():
    creator_data = ai_to_ai[ai_to_ai['ai_creator'] == creator]
    if len(creator_data) > 0:
        total_reviews = creator_data['review_count'].sum()
        print(f'\n🔧 {creator} ({total_reviews:,} AI reviews):')
        print('-' * 45)
        
        for j, (_, row) in enumerate(creator_data.iterrows(), 1):
            reviewer = str(row['reviewer'])
            count = int(row['review_count'])
            percentage = (count / total_reviews) * 100
            print(f'   {j:2d}. {reviewer:<30} {count:>6,} ({percentage:4.1f}%)')

# Summary statistics
print(f'\n📈 COMPREHENSIVE SUMMARY:')
print('=' * 30)

# Reviews by AI creator
print(f'\nREVIEWS BY AI CREATOR TOOL:')
print('-' * 30)
creator_totals = combinations.groupby('ai_creator')['review_count'].sum().sort_values(ascending=False)
for tool, total in creator_totals.items():
    ai_reviews = ai_to_ai[ai_to_ai['ai_creator'] == tool]['review_count'].sum()
    ai_percentage = (ai_reviews / total) * 100 if total > 0 else 0
    print(f'{tool:<18} {total:>8,} total ({ai_reviews:>6,} AI reviews, {ai_percentage:4.1f}%)')

# Most active AI reviewers
print(f'\nMOST ACTIVE AI REVIEWERS:')
print('-' * 35)
ai_reviewer_totals = ai_to_ai.groupby('reviewer')['review_count'].sum().sort_values(ascending=False)
for reviewer, total in ai_reviewer_totals.items():
    reviewer_short = str(reviewer)[:40]
    print(f'{reviewer_short:<40} {total:>8,}')

# Cross-platform vs same-platform analysis
print(f'\n🔗 ECOSYSTEM ANALYSIS:')
print('-' * 25)

# Detect same-platform combinations (simplified heuristic)
same_platform = 0
cross_platform = 0

for _, row in ai_to_ai.iterrows():
    creator = str(row['ai_creator']).lower()
    reviewer = str(row['reviewer']).lower()
    count = row['review_count']
    
    # Simple heuristic for same platform
    is_same_platform = False
    if 'copilot' in creator and 'copilot' in reviewer:
        is_same_platform = True
    elif 'cursor' in creator and 'cursor' in reviewer:
        is_same_platform = True
    elif 'codex' in creator and any(word in reviewer for word in ['openai', 'gpt']):
        is_same_platform = True
    
    if is_same_platform:
        same_platform += count
    else:
        cross_platform += count

total_ai_reviews = same_platform + cross_platform
if total_ai_reviews > 0:
    print(f'Same-platform combinations: {same_platform:,} ({same_platform/total_ai_reviews*100:.1f}%)')
    print(f'Cross-platform combinations: {cross_platform:,} ({cross_platform/total_ai_reviews*100:.1f}%)')

print(f'\n🎯 KEY INSIGHTS:')
print('-' * 15)
print(f'   • Total unique combinations: {len(combinations):,}')
print(f'   • AI → AI combinations: {len(ai_to_ai):,}')
print(f'   • AI → Human combinations: {len(combinations) - len(ai_to_ai):,}')
print(f'   • Most reviewed AI tool: {creator_totals.index[0]}')
print(f'   • Most active AI reviewer: {ai_reviewer_totals.index[0]}')
print(f'   • Total AI creators: {len(ai_to_ai["ai_creator"].unique())}')
print(f'   • Total AI reviewers: {len(ai_to_ai["reviewer"].unique())}')

# Store for potential visualization
combinations_for_viz = combinations
ai_combinations_complete = ai_to_ai

print(f'\n✅ COMPLETE AI COMBINATION ANALYSIS FINISHED!')
print(f'📊 Variables created: combinations_for_viz, ai_combinations_complete')
print(f'🚀 Ready for visualization and further analysis!')

In [None]:
# FOCUSED HEATMAP: TOP REVIEWERS ONLY
print("Creating focused heatmaps with top reviewers...")

# 1. Filter to top 20 most active reviewers
top_reviewers = ai_combinations_complete.groupby('reviewer')['review_count'].sum().nlargest(20).index
ai_top_reviewers = ai_combinations_complete[ai_combinations_complete['reviewer'].isin(top_reviewers)]

# Create pivot for top reviewers only
pivot_focused = ai_top_reviewers.pivot(
    index='ai_creator', 
    columns='reviewer', 
    values='review_count'
).fillna(0)

print(f"Focused heatmap: {pivot_focused.shape[0]} creators × {pivot_focused.shape[1]} reviewers")

# 2. REGULAR HEATMAP - TOP 20 REVIEWERS
plt.figure(figsize=(18, 8))
sns.heatmap(pivot_focused, 
            annot=True,
            fmt='.0f',
            cmap='YlOrRd',
            cbar_kws={'label': 'Review Count'},
            linewidths=0.5)

plt.title('AI Creator → AI Reviewer Combinations (Top 20 Most Active Reviewers)', 
          fontsize=16, fontweight='bold')
plt.xlabel('AI Reviewers', fontsize=12)
plt.ylabel('AI Creators', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# 3. LOG-SCALE HEATMAP - TOP 20 REVIEWERS
plt.figure(figsize=(18, 8))
pivot_focused_log = np.log1p(pivot_focused)

sns.heatmap(pivot_focused_log, 
            annot=True, 
            fmt='.1f',
            cmap='viridis',
            cbar_kws={'label': 'Log(1 + Review Count)'},
            linewidths=0.5)

plt.title('AI Creator → AI Reviewer Combinations - Log Scale (Top 20 Reviewers)', 
          fontsize=16, fontweight='bold')
plt.xlabel('AI Reviewers', fontsize=12)
plt.ylabel('AI Creators', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# 4. ALTERNATIVE: Top 10 for even cleaner view
top_10_reviewers = ai_combinations_complete.groupby('reviewer')['review_count'].sum().nlargest(10).index
ai_top_10 = ai_combinations_complete[ai_combinations_complete['reviewer'].isin(top_10_reviewers)]

pivot_top10 = ai_top_10.pivot(
    index='ai_creator', 
    columns='reviewer', 
    values='review_count'
).fillna(0)

plt.figure(figsize=(14, 6))
sns.heatmap(pivot_top10, 
            annot=True,
            fmt='.0f',
            cmap='RdYlBu_r',
            cbar_kws={'label': 'Review Count'},
            linewidths=0.8)

plt.title('AI Creator → AI Reviewer Combinations (Top 10 Most Active Reviewers)', 
          fontsize=16, fontweight='bold')
plt.xlabel('AI Reviewers', fontsize=12)
plt.ylabel('AI Creators', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# 5. Show which reviewers were selected
print(f"\nTop 20 Reviewers Selected:")
for i, reviewer in enumerate(top_reviewers, 1):
    total_reviews = ai_combinations_complete[ai_combinations_complete['reviewer'] == reviewer]['review_count'].sum()
    print(f"{i:2d}. {reviewer:<35} {total_reviews:>6,} reviews")

print(f"\nTop 10 Reviewers Selected:")
for i, reviewer in enumerate(top_10_reviewers, 1):
    total_reviews = ai_combinations_complete[ai_combinations_complete['reviewer'] == reviewer]['review_count'].sum()
    print(f"{i:2d}. {reviewer:<35} {total_reviews:>6,} reviews")

# 6. Coverage analysis
total_reviews_all = ai_combinations_complete['review_count'].sum()
total_reviews_top20 = ai_top_reviewers['review_count'].sum()
total_reviews_top10 = ai_top_10['review_count'].sum()

print(f"\nCoverage Analysis:")
print(f"All 41 reviewers:  {total_reviews_all:,} reviews (100.0%)")
print(f"Top 20 reviewers:  {total_reviews_top20:,} reviews ({total_reviews_top20/total_reviews_all*100:.1f}%)")
print(f"Top 10 reviewers:  {total_reviews_top10:,} reviews ({total_reviews_top10/total_reviews_all*100:.1f}%)")