In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet50, ResNet50_Weights
import cv2
import numpy as np
import json
import os
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import accuracy_score

# Dataset Definition
class HandwritingDataset(Dataset):
    def __init__(self, image_dir, annotations_file, max_length=128):
        self.image_dir = image_dir
        self.max_length = max_length
        
        with open(annotations_file, 'r') as f:
            self.annotations = json.load(f)

        self.data = [
            item for item in self.annotations 
            if item['status'] == 'success' and len(item['text'].strip()) > 0
        ]

        self.create_char_mappings()
        
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomRotation(degrees=10),
            transforms.ColorJitter(brightness=0.3, contrast=0.3),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

    def create_char_mappings(self):
        all_chars = set()
        for item in self.data:
            all_chars.update(item['text'])
        
        self.char_to_idx = {char: idx + 1 for idx, char in enumerate(sorted(all_chars))}
        self.char_to_idx['<pad>'] = 0
        self.idx_to_char = {idx: char for char, idx in self.char_to_idx.items()}
        self.vocab_size = len(self.char_to_idx)
        
        print(f"Vocabulary size: {self.vocab_size}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        image_path = os.path.join(
            self.image_dir,
            f"{os.path.splitext(item['filename'])[0]}_binary_adaptive.png"
        )
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Could not load image: {image_path}")
        if len(image.shape) == 2:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        image = self.transform(image)
        text = item['text'][:self.max_length]
        text_indices = [self.char_to_idx[c] for c in text]
        if len(text_indices) < self.max_length:
            text_indices.extend([0] * (self.max_length - len(text_indices)))
        return {
            'image': image,
            'text': torch.tensor(text_indices, dtype=torch.long),
            'length': len(text)
        }

# Model Definition
class HandwritingRecognitionModel(nn.Module):
    def __init__(self, vocab_size, hidden_size=256, sequence_length=128):
        super().__init__()
        self.sequence_length = sequence_length
        self.cnn = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.cnn.fc = nn.Identity()
        self.feature_processor = nn.Sequential(
            nn.Linear(2048, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.rnn = nn.GRU(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=3,
            bidirectional=True,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_size * 2, vocab_size)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        batch_size = x.size(0)
        features = self.cnn(x)
        features = self.feature_processor(features)
        features = features.unsqueeze(1).repeat(1, self.sequence_length, 1)
        rnn_out, _ = self.rnn(features)
        logits = self.fc(rnn_out)
        return logits

# Edit Distance Calculation
def edit_distance(s1, s2):
    """Calculate edit distance using dynamic programming."""
    m, n = len(s1), len(s2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]
    
    for i in range(m + 1):
        dp[i][0] = i
    for j in range(n + 1):
        dp[0][j] = j

    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if s1[i - 1] == s2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
    
    return dp[m][n]

def similarity_ratio(s1, s2):
    """Calculate similarity ratio based on edit distance."""
    distance = edit_distance(s1, s2)
    max_len = max(len(s1), len(s2))
    if max_len == 0:
        return 1.0
    return 1 - distance / max_len

# Training Loop
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    with tqdm(train_loader, desc='Training') as pbar:
        for batch in pbar:
            images = batch['image'].to(device)
            texts = batch['text'].to(device)
            optimizer.zero_grad()
            logits = model(images)
            B, S, V = logits.shape
            loss = criterion(logits.view(B * S, V), texts.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            pbar.set_postfix({'loss': loss.item()})
    return total_loss / len(train_loader)

# Validation Loop
def validate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            images = batch['image'].to(device)
            texts = batch['text'].to(device)
            logits = model(images)
            B, S, V = logits.shape
            loss = criterion(logits.view(B * S, V), texts.view(-1))
            total_loss += loss.item()
    return total_loss / len(val_loader)

# Decode Predictions
def decode_prediction(logits, idx_to_char, beam_width=5):
    predictions = torch.argmax(logits, dim=-1)
    decoded_texts = []
    for pred in predictions:
        text = ''.join([idx_to_char[idx.item()] for idx in pred if idx.item() != 0])
        decoded_texts.append(text)
    return decoded_texts

# Main Function
from torchsummary import summary

def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Data Preparation
    data_dir = "output"
    image_dir = os.path.join(data_dir, "processed_images")
    annotations_file = os.path.join(data_dir, "ocr/detailed_results.json")
    dataset = HandwritingDataset(image_dir, annotations_file)
    print(f"Total samples: {len(dataset)}")
    
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)
    
    # Model Initialization
    model = HandwritingRecognitionModel(
        vocab_size=dataset.vocab_size,
        sequence_length=dataset.max_length
    ).to(device)
    
    # Print Model Summary
    print("\nModel Summary:")
    summary(model, input_size=(3, 224, 224))
    
    # Training Setup
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    optimizer = optim.AdamW(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    num_epochs = 15
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss = validate(model, val_loader, criterion, device)
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Val Loss: {val_loss:.4f}")
        scheduler.step()
        
        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
                'char_to_idx': dataset.char_to_idx,
                'idx_to_char': dataset.idx_to_char
            }, 'best_model.pth')
            print("Saved best model")
            
            # Sample Predictions
            with torch.no_grad():
                model.eval()
                batch = next(iter(val_loader))
                images = batch['image'].to(device)
                logits = model(images)
                predictions = decode_prediction(logits, dataset.idx_to_char)
                print("\nSample Predictions:")
                for i, pred in enumerate(predictions[:3]):  # Show first 3 predictions
                    print(f"Predicted: {pred}")

if __name__ == "__main__":
    main()


Using device: cuda
Vocabulary size: 112
Total samples: 427

Model Summary:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13        

Training: 100%|██████████| 22/22 [00:46<00:00,  2.10s/it, loss=2.42]


Train Loss: 2.8634
Val Loss: 2.6259
Saved best model

Sample Predictions:
Predicted: eeeee                                                                                                                           
Predicted: eeeeeee                                                                                                                         
Predicted: eeeeeeee                                                                                                                        

Epoch 2/15


Training: 100%|██████████| 22/22 [00:46<00:00,  2.10s/it, loss=2.55]


Train Loss: 2.6129
Val Loss: 2.6067
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 3/15


Training: 100%|██████████| 22/22 [00:46<00:00,  2.11s/it, loss=2.58]


Train Loss: 2.6064
Val Loss: 2.6103

Epoch 4/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.06s/it, loss=2.65]


Train Loss: 2.6047
Val Loss: 2.6003
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 5/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.06s/it, loss=2.41]


Train Loss: 2.5948
Val Loss: 2.6066

Epoch 6/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.08s/it, loss=2.5] 


Train Loss: 2.5959
Val Loss: 2.5996
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 7/15


Training: 100%|██████████| 22/22 [00:46<00:00,  2.14s/it, loss=2.59]


Train Loss: 2.5963
Val Loss: 2.5947
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 8/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.07s/it, loss=2.46]


Train Loss: 2.5883
Val Loss: 2.5928
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 9/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.07s/it, loss=2.51]


Train Loss: 2.5864
Val Loss: 2.5938

Epoch 10/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.08s/it, loss=2.85]


Train Loss: 2.5965
Val Loss: 2.5923
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 11/15


Training: 100%|██████████| 22/22 [00:45<00:00,  2.06s/it, loss=2.51]


Train Loss: 2.5855
Val Loss: 2.5927

Epoch 12/15


Training: 100%|██████████| 22/22 [00:46<00:00,  2.10s/it, loss=2.62]


Train Loss: 2.5890
Val Loss: 2.5930

Epoch 13/15


Training: 100%|██████████| 22/22 [00:44<00:00,  2.04s/it, loss=2.51]


Train Loss: 2.5859
Val Loss: 2.5913
Saved best model

Sample Predictions:
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              
Predicted: ee                                                                                                                              

Epoch 14/15


Training: 100%|██████████| 22/22 [00:46<00:00,  2.12s/it, loss=3.13]


Train Loss: 2.6066
Val Loss: 2.5921

Epoch 15/15


Training: 100%|██████████| 22/22 [00:46<00:00,  2.12s/it, loss=2.49]


Train Loss: 2.5900
Val Loss: 2.6015


In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import json
from collections import defaultdict
import os

class ModelAnalyzer:
    def __init__(self, model_path, test_loader, device, idx_to_char):
        """
        Initialize the analyzer with model and data
        """
        self.checkpoint = torch.load(model_path)
        self.model = HandwritingRecognitionModel(
            vocab_size=len(idx_to_char),
            sequence_length=128
        ).to(device)
        self.model.load_state_dict(self.checkpoint['model_state_dict'])
        self.model.eval()
        
        self.test_loader = test_loader
        self.device = device
        self.idx_to_char = idx_to_char
        
    def decode_text(self, tensor):
        """Convert tensor predictions to text"""
        indices = tensor.argmax(dim=-1)
        text = ''.join([self.idx_to_char[idx.item()] for idx in indices if idx.item() != 0])
        return text
        
    def calculate_text_metrics(self, pred_text, true_text):
        """Calculate various text similarity metrics"""
        return {
            'edit_distance': edit_distance(pred_text, true_text),
            'similarity_ratio': similarity_ratio(pred_text, true_text),
            'length_diff': abs(len(pred_text) - len(true_text)),
            'exact_match': pred_text == true_text
        }
        
    def analyze_model_performance(self):
        """Perform comprehensive model analysis"""
        results = []
        character_errors = defaultdict(int)
        total_chars = 0
        correct_chars = 0
        
        with torch.no_grad():
            for batch in tqdm(self.test_loader, desc="Analyzing performance"):
                images = batch['image'].to(self.device)
                true_texts = batch['text']
                
                logits = self.model(images)
                
                for i in range(len(images)):
                    pred_text = self.decode_text(logits[i])
                    true_text = ''.join([self.idx_to_char[idx.item()] 
                                       for idx in true_texts[i] if idx.item() != 0])
                    
                    metrics = self.calculate_text_metrics(pred_text, true_text)
                    
                    for pred_char, true_char in zip(pred_text, true_text):
                        total_chars += 1
                        if pred_char == true_char:
                            correct_chars += 1
                        else:
                            character_errors[f"{true_char}->{pred_char}"] += 1
                    
                    results.append({
                        'predicted_text': pred_text,
                        'true_text': true_text,
                        'text_length': len(true_text),
                        **metrics
                    })
        
        return pd.DataFrame(results), character_errors, correct_chars / total_chars

    def generate_analysis_report(self, save_dir='analysis_results'):
        """Generate comprehensive analysis report with visualizations"""
        os.makedirs(save_dir, exist_ok=True)
        
        print("Analyzing model performance...")
        results_df, char_errors, char_accuracy = self.analyze_model_performance()
        
        basic_stats = {
            'Total Samples': len(results_df),
            'Average Edit Distance': results_df['edit_distance'].mean(),
            'Average Similarity Ratio': results_df['similarity_ratio'].mean(),
            'Exact Match Rate': results_df['exact_match'].mean(),
            'Character Accuracy': char_accuracy
        }
        
        with open(os.path.join(save_dir, 'basic_stats.json'), 'w') as f:
            json.dump(basic_stats, f, indent=4)
        
        print("\nBasic Statistics:")
        print(pd.DataFrame.from_dict(basic_stats, orient='index', columns=['Value']))
        
        # Visualization
        plt.figure(figsize=(16, 12))
        
        # Boxplot for Edit Distance
        plt.subplot(2, 2, 1)
        sns.boxplot(data=results_df, x='edit_distance', color='skyblue')
        plt.title('Boxplot of Edit Distances')
        plt.xlabel('Edit Distance')
        
        # Violin plot for Text Length vs Similarity Ratio
        plt.subplot(2, 2, 2)
        sns.violinplot(data=results_df, x='text_length', y='similarity_ratio', scale='width', palette='muted')
        plt.title('Text Length vs Similarity Ratio')
        plt.xlabel('Text Length')
        plt.ylabel('Similarity Ratio')
        
        # Heatmap for Character Error Types
        plt.subplot(2, 2, 3)
        char_errors_df = pd.DataFrame(
            list(char_errors.items()),
            columns=['Error_Type', 'Count']
        ).sort_values('Count', ascending=False).head(10)
        sns.heatmap(char_errors_df.pivot_table(index='Error_Type', values='Count', aggfunc='sum'), annot=True, fmt='d', cmap='YlGnBu')
        plt.title('Top 10 Character Error Types')
        plt.xlabel('Frequency')
        
        # Boxplot for Length Differences
        plt.subplot(2, 2, 4)
        sns.boxplot(data=results_df, x='length_diff', color='coral')
        plt.title('Boxplot of Length Differences')
        plt.xlabel('Length Difference')
        
        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, 'analysis_plots.png'))
        plt.close()
        
        # Save detailed results
        results_df.to_csv(os.path.join(save_dir, 'detailed_results.csv'), index=False)
        
        # Display best and worst examples
        examples = results_df.sort_values('similarity_ratio')
        worst_examples = examples.head(10)
        best_examples = examples.tail(10)
        
        print("\nWorst Examples:")
        print(worst_examples[['predicted_text', 'true_text', 'similarity_ratio']])
        
        print("\nBest Examples:")
        print(best_examples[['predicted_text', 'true_text', 'similarity_ratio']])
        
        examples_report = {
            'worst_cases': worst_examples.to_dict('records'),
            'best_cases': best_examples.to_dict('records')
        }
        
        with open(os.path.join(save_dir, 'example_cases.json'), 'w') as f:
            json.dump(examples_report, f, indent=4)
        
        return basic_stats, results_df
    
    def categorize_error(self, pred, true):
        """Categorize the type of error"""
        if len(pred) > len(true):
            return 'insertion'
        elif len(pred) < len(true):
            return 'deletion'
        else:
            return 'substitution'

def analyze_model_results():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_path = 'best_model.pth'
    
    test_dataset = HandwritingDataset(
        image_dir="output/processed_images",
        annotations_file="output/ocr/detailed_results.json"
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=16,
        shuffle=False,
        num_workers=0
    )
    
    analyzer = ModelAnalyzer(
        model_path=model_path,
        test_loader=test_loader,
        device=device,
        idx_to_char=test_dataset.idx_to_char
    )
    
    print("Generating analysis report...")
    stats, results = analyzer.generate_analysis_report()
    
    print("\nModel Performance Summary:")
    print("-" * 50)
    for key, value in stats.items():
        print(f"{key}: {value:.4f}")
    
    print("\nDetailed results saved in 'analysis_results' directory")

if __name__ == "__main__":
    analyze_model_results()


Vocabulary size: 112
Generating analysis report...
Analyzing model performance...


Analyzing performance: 100%|██████████| 27/27 [00:56<00:00,  2.11s/it]



Basic Statistics:
                               Value
Total Samples             427.000000
Average Edit Distance      91.201405
Average Similarity Ratio    0.287489
Exact Match Rate            0.000000
Character Accuracy          0.287489

Worst Examples:
                                        predicted_text  \
371  ee                                            ...   
383  ee                                            ...   
104  ee                                            ...   
90   ee                                            ...   
228  ee                                            ...   
170  ee                                            ...   
115  ee                                            ...   
187  ee                                            ...   
97   ee                                            ...   
226  ee                                            ...   

                                             true_text  similarity_ratio  
371  Fen SOE agian Tae nD me