# CT-MA-CircuitThinking Analysis Notebook

This notebook provides analysis and visualization tools for the CT-MA system.

In [None]:
import sys
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Add src to path
sys.path.append(str(Path.cwd().parent / "src"))

from src.utils.config_manager import ConfigManager
from src.utils.logger import setup_logger, get_logger
from src.cot.format_validator import FormatValidator
from src.cot.quality_checker import QualityChecker

# Setup
setup_logger()
config = ConfigManager()
logger = get_logger(__name__)

print("CT-MA Analysis Environment Ready!")

## 1. Load Generated CoT Data

In [None]:
# Load the latest CoT dataset
cot_datasets_path = Path(config.get("data.cot_datasets_path"))
cot_files = list(cot_datasets_path.glob("cot_dataset_*.json"))

if cot_files:
    latest_file = max(cot_files, key=lambda x: x.stat().st_mtime)
    print(f"Loading: {latest_file}")
    
    with open(latest_file, 'r', encoding='utf-8') as f:
        cot_data = json.load(f)
    
    print(f"Loaded {len(cot_data)} CoT items")
else:
    print("No CoT datasets found. Please run the generation first.")
    cot_data = []

## 2. Basic Statistics

In [None]:
if cot_data:
    # Filter successful items
    successful_items = [item for item in cot_data if 'error' not in item]
    
    print(f"Total items: {len(cot_data)}")
    print(f"Successful items: {len(successful_items)}")
    print(f"Success rate: {len(successful_items)/len(cot_data):.1%}")
    
    if successful_items:
        # Length statistics
        logic_lengths = [len(item.get('logic', '')) for item in successful_items]
        think_lengths = [len(item.get('think', '')) for item in successful_items]
        answer_lengths = [len(item.get('answer', '')) for item in successful_items]
        
        print(f"\nAverage section lengths:")
        print(f"  Logic: {sum(logic_lengths)/len(logic_lengths):.0f} chars")
        print(f"  Think: {sum(think_lengths)/len(think_lengths):.0f} chars")
        print(f"  Answer: {sum(answer_lengths)/len(answer_lengths):.0f} chars")

## 3. Length Distribution Analysis

In [None]:
if successful_items:
    # Create DataFrame for analysis
    df_data = []
    for item in successful_items:
        df_data.append({
            'data_id': item.get('data_id', ''),
            'application': item.get('source_application', ''),
            'logic_length': len(item.get('logic', '')),
            'think_length': len(item.get('think', '')),
            'answer_length': len(item.get('answer', '')),
            'total_length': len(item.get('logic', '')) + len(item.get('think', '')) + len(item.get('answer', ''))
        })
    
    df = pd.DataFrame(df_data)
    
    # Plot length distributions
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Logic length distribution
    axes[0, 0].hist(df['logic_length'], bins=20, alpha=0.7, color='blue')
    axes[0, 0].set_title('Logic Section Length Distribution')
    axes[0, 0].set_xlabel('Characters')
    axes[0, 0].set_ylabel('Frequency')
    
    # Think length distribution
    axes[0, 1].hist(df['think_length'], bins=20, alpha=0.7, color='green')
    axes[0, 1].set_title('Think Section Length Distribution')
    axes[0, 1].set_xlabel('Characters')
    axes[0, 1].set_ylabel('Frequency')
    
    # Answer length distribution
    axes[1, 0].hist(df['answer_length'], bins=20, alpha=0.7, color='red')
    axes[1, 0].set_title('Answer Section Length Distribution')
    axes[1, 0].set_xlabel('Characters')
    axes[1, 0].set_ylabel('Frequency')
    
    # Total length distribution
    axes[1, 1].hist(df['total_length'], bins=20, alpha=0.7, color='purple')
    axes[1, 1].set_title('Total Length Distribution')
    axes[1, 1].set_xlabel('Characters')
    axes[1, 1].set_ylabel('Frequency')
    
    plt.tight_layout()
    plt.show()
    
    # Display summary statistics
    print("\nLength Statistics:")
    print(df[['logic_length', 'think_length', 'answer_length', 'total_length']].describe())

## 4. Quality Analysis

In [None]:
if successful_items:
    # Analyze quality for a sample of items
    quality_checker = QualityChecker(config)
    
    quality_scores = []
    sample_size = min(10, len(successful_items))  # Analyze first 10 items
    
    print(f"Analyzing quality for {sample_size} items...")
    
    for i, item in enumerate(successful_items[:sample_size]):
        quality_result = quality_checker.check_quality(item)
        quality_scores.append({
            'data_id': item.get('data_id', f'item_{i}'),
            'overall_score': quality_result['overall_score'],
            'logical_coherence': quality_result['dimension_scores']['logical_coherence']['score'],
            'technical_accuracy': quality_result['dimension_scores']['technical_accuracy']['score'],
            'reasoning_depth': quality_result['dimension_scores']['reasoning_depth']['score'],
            'domain_relevance': quality_result['dimension_scores']['domain_relevance']['score'],
            'completeness': quality_result['dimension_scores']['completeness']['score'],
            'clarity': quality_result['dimension_scores']['clarity']['score']
        })
    
    quality_df = pd.DataFrame(quality_scores)
    
    # Plot quality dimensions
    plt.figure(figsize=(12, 8))
    
    # Radar chart for average quality dimensions
    dimensions = ['logical_coherence', 'technical_accuracy', 'reasoning_depth', 
                 'domain_relevance', 'completeness', 'clarity']
    avg_scores = [quality_df[dim].mean() for dim in dimensions]
    
    # Create radar chart
    angles = [n / float(len(dimensions)) * 2 * 3.14159 for n in range(len(dimensions))]
    angles += angles[:1]  # Complete the circle
    avg_scores += avg_scores[:1]  # Complete the circle
    
    plt.subplot(111, projection='polar')
    plt.plot(angles, avg_scores, 'o-', linewidth=2)
    plt.fill(angles, avg_scores, alpha=0.25)
    plt.xticks(angles[:-1], dimensions)
    plt.ylim(0, 1)
    plt.title('Average Quality Dimensions')
    plt.show()
    
    # Display quality statistics
    print("\nQuality Statistics:")
    print(quality_df.describe())

## 5. Sample CoT Data Inspection

In [None]:
if successful_items:
    # Display a sample CoT item
    sample_item = successful_items[0]
    
    print("=" * 60)
    print("SAMPLE COT ITEM")
    print("=" * 60)
    print(f"Application: {sample_item.get('source_application', 'Unknown')}")
    print(f"Data ID: {sample_item.get('data_id', 'Unknown')}")
    print()
    
    print("LOGIC:")
    print("-" * 40)
    print(sample_item.get('logic', 'No logic content')[:500] + "...")
    print()
    
    print("THINK:")
    print("-" * 40)
    print(sample_item.get('think', 'No think content')[:500] + "...")
    print()
    
    print("ANSWER:")
    print("-" * 40)
    print(sample_item.get('answer', 'No answer content'))
    print()
    
    # Show metadata if available
    if 'metadata' in sample_item:
        print("METADATA:")
        print("-" * 40)
        for key, value in sample_item['metadata'].items():
            print(f"{key}: {value}")

## 6. Export Analysis Results

In [None]:
if successful_items:
    # Export analysis results
    reports_path = Path(config.get("data.reports_path"))
    reports_path.mkdir(parents=True, exist_ok=True)
    
    # Export length analysis
    if 'df' in locals():
        df.to_csv(reports_path / "length_analysis.csv", index=False)
        print(f"Length analysis exported to: {reports_path / 'length_analysis.csv'}")
    
    # Export quality analysis
    if 'quality_df' in locals():
        quality_df.to_csv(reports_path / "quality_analysis.csv", index=False)
        print(f"Quality analysis exported to: {reports_path / 'quality_analysis.csv'}")
    
    print("\nAnalysis completed!")
else:
    print("No data to analyze. Please generate CoT data first.")