In [1]:
import torch
from PIL import Image
from open_flamingo import create_model_and_transforms
from dataclasses import dataclass
import pandas as pd
import Levenshtein
import json
from tqdm.auto import tqdm
from typing import Tuple, List, Dict
import os
from pathlib import Path
import logging
from datetime import datetime

@dataclass
class PosixConfig:
    max_new_tokens: int = 20
    device_id: int = 0
    batched: bool = False
    num_beams: int = 3

@dataclass
class PosixTrace:
    prompts: list
    responses: list
    logprob_matrices: list
    prompt_sensitivities: list
    posix: float

class FlamingoModel:
    def __init__(
        self,
        clip_vision_encoder_path: str = "ViT-L-14",
        clip_vision_encoder_pretrained: str = "openai",
        lang_encoder_path: str = "/shared/shashmi/mpt-7b-model",
        tokenizer_path: str = "/shared/shashmi/mpt-7b-model",
        device: str = "cuda:0"
    ):
        """Initialize OpenFlamingo model and components."""
        self.device = device
        
        # Create model and get processors
        self.model, self.image_processor, self.tokenizer = create_model_and_transforms(
            clip_vision_encoder_path=clip_vision_encoder_path,
            clip_vision_encoder_pretrained=clip_vision_encoder_pretrained,
            lang_encoder_path=lang_encoder_path,
            tokenizer_path=tokenizer_path,
            cross_attn_every_n_layers=4
        )
        
        # Move model to device
        self.model = self.model.to(device)
        self.tokenizer.padding_side = "left"
    
    def format_prompt(self, question: str) -> str:
        """Format the question using OpenFlamingo's template."""
        return f"<image>{question}<|endofchunk|>"
    
    def process_image(self, image_path: str) -> torch.Tensor:
        """Process a single image."""
        image = Image.open(image_path)
        vision_x = self.image_processor(image).unsqueeze(0)
        vision_x = vision_x.unsqueeze(1).unsqueeze(0)
        return vision_x.to(self.device)
    
    def get_responses(
        self,
        image_path: str,
        prompts: list[str],
        max_new_tokens: int = 20,
        num_beams: int = 3
    ) -> Tuple[list[list[int]], list[str], list[int]]:
        """Generate responses for a set of prompts."""
        response_tokens = []
        responses = []
        instruction_lengths = []
        
        # Process image once
        vision_x = self.process_image(image_path)
        
        for prompt in prompts:
            # Format prompt
            formatted_prompt = self.format_prompt(prompt)
            
            # Tokenize
            inputs = self.tokenizer(
                [formatted_prompt],
                return_tensors="pt",
            )
            inputs = {k: v.to(self.device) for k, v in inputs.items()}
            instruction_lengths.append(len(inputs['input_ids'][0]))
            
            # Generate
            with torch.inference_mode():
                output_ids = self.model.generate(
                    vision_x=vision_x,
                    lang_x=inputs["input_ids"],
                    attention_mask=inputs["attention_mask"],
                    max_new_tokens=max_new_tokens,
                    num_beams=num_beams,
                )
            
            response_tokens.append(output_ids[0].tolist())
            response_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
            responses.append(response_text)
        
        return response_tokens, responses, instruction_lengths
    
    def compute_log_probabilities(
        self,
        image_path: str,
        prompt: str,
        response_tokens: list[int],
        instruction_length: int
    ) -> float:
        """Compute log probabilities for a prompt-response pair."""
        # Process image
        vision_x = self.process_image(image_path)
        
        # Format prompt
        formatted_prompt = self.format_prompt(prompt)
        
        # Prepare inputs
        inputs = self.tokenizer(
            [formatted_prompt],
            return_tensors="pt",
        )
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        # Combine input and response tokens
        full_sequence = torch.cat([
            inputs['input_ids'],
            torch.tensor([response_tokens]).to(self.device)
        ], dim=1)
        
        # Create appropriate attention mask
        attention_mask = torch.ones_like(full_sequence)
        
        with torch.inference_mode():
            outputs = self.model(
                vision_x=vision_x,
                lang_x=full_sequence,
                attention_mask=attention_mask
            )
            logits = outputs.logits
            
            # Get logits for response portion only
            response_logits = logits[:, instruction_length-1:instruction_length-1+len(response_tokens), :]
            log_probs = torch.log_softmax(response_logits, dim=-1)
            
            final_logprob = 0.0
            for i, token in enumerate(response_tokens):
                token_logprob = log_probs[0, i, token].item()
                final_logprob += token_logprob
                
        return final_logprob

class PromptSensitivityAnalyzer:
    @staticmethod
    def calculate_char_level_similarity(str1: str, str2: str) -> float:
        """Calculate character-level similarity using Levenshtein distance."""
        distance = Levenshtein.distance(str1, str2)
        max_len = max(len(str1), len(str2))
        return 1 - (distance / max_len)
    
    @staticmethod
    def extract_prompt_sets(json_data: Dict) -> List[Dict[str, List[str]]]:
        """Extract prompt sets from the JSON data."""
        prompt_sets = []
        
        variations = [json_data[f'variation_{i}'] for i in range(1, 11)]
        current_set = {
            'original': json_data['question'],
            'variations': variations,
            'answer': json_data['answer'],
            'image': json_data['image']
        }
        prompt_sets.append(current_set)
        
        return prompt_sets

def get_flamingo_posix(
    model: FlamingoModel,
    prompt_sets: List[Dict[str, List[str]]],
    config: PosixConfig,
    verbose: bool = False
) -> Tuple[float, PosixTrace, List[Dict]]:
    """Calculate POSIX scores using OpenFlamingo model."""
    N_prompt_sets = len(prompt_sets)
    
    responses = []
    response_tokens = []
    logprob_matrices = []
    prompt_sensitivities = []
    detailed_scores = []
    
    pbar = tqdm(range(N_prompt_sets))
    for i in range(N_prompt_sets):
        prompt_set = prompt_sets[i]
        all_prompts = [prompt_set['original']] + prompt_set['variations']
        image_path = prompt_set['image']
        
        if verbose:
            print(f"\nProcessing prompt set {i+1}")
            print(f"Original prompt: {model.format_prompt(all_prompts[0])}")
            print(f"First variation: {model.format_prompt(all_prompts[1])}")
        
        # Generate responses for all prompts
        set_tokens, set_responses, instruction_lengths = model.get_responses(
            image_path,
            all_prompts,
            max_new_tokens=config.max_new_tokens,
            num_beams=config.num_beams
        )
        
        response_tokens.append(set_tokens)
        responses.append(set_responses)
        
        N_prompts = len(all_prompts)
        logprob_matrix = [[0 for _ in range(N_prompts)] for _ in range(N_prompts)]
        
        # Calculate log probabilities for all prompt-response pairs
        for i in range(N_prompts):
            for j in range(N_prompts):
                logprob = model.compute_log_probabilities(
                    image_path,
                    all_prompts[i],
                    set_tokens[j],
                    instruction_lengths[i]
                )
                logprob_matrix[i][j] = logprob
        
        logprob_matrices.append(logprob_matrix)
        
        # Calculate prompt sensitivity
        psi = 0.0
        scores = {}
        for i in range(N_prompts):
            for j in range(N_prompts):
                if i != j:
                    response_length = len(set_tokens[j])
                    diff = abs(logprob_matrix[i][j] - logprob_matrix[i][i]) / response_length
                    psi += diff
                    if i == 0:
                        scores[f"Variation {j}"] = diff
                        
        prompt_sensitivity = psi / (N_prompts * (N_prompts - 1))
        prompt_sensitivities.append(prompt_sensitivity)
        detailed_scores.append(scores)
        
        if verbose:
            print(f"\nPrompt sensitivity: {prompt_sensitivity:.4f}")
        
        pbar.update(1)
    
    posix = sum(prompt_sensitivities) / N_prompt_sets
    
    trace = PosixTrace(
        [set['original'] for set in prompt_sets],
        responses,
        logprob_matrices,
        prompt_sensitivities,
        posix
    )
    
    return posix, trace, detailed_scores

def main():
    # File paths and configuration
    input_dir = "/ephemeral/shashmi/posix_new_improved/Thesis/spell_error_question_variants"
    output_dir = "/ephemeral/shashmi/posix_new_improved/i_swear_final_openflamingo/spell_error"
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Initialize model and config
    model = FlamingoModel(
        lang_encoder_path="/shared/shashmi/mpt-7b-model",
        tokenizer_path="/shared/shashmi/mpt-7b-model",
        device="cuda:0"
    )
    config = PosixConfig(max_new_tokens=50, device_id=0, num_beams=3)
    
    # Get list of JSON files to process
    json_files = sorted(Path(input_dir).glob("question_*_variants.json"))
    print(f"Found {len(json_files)} files to process")
    
    # Process each file
    all_results = []
    for file_path in tqdm(json_files, desc="Processing files"):
        try:
            # Read JSON file
            with open(file_path, 'r') as f:
                json_data = json.load(f)
            
            # Process file
            analyzer = PromptSensitivityAnalyzer()
            prompt_sets = analyzer.extract_prompt_sets(json_data)
            
            # Calculate POSIX scores
            posix, trace, detailed_scores = get_flamingo_posix(
                model,
                prompt_sets,
                config,
                verbose=False
            )
            
            # Save individual result
            output_file = os.path.join(output_dir, f"{file_path.stem}_results.json")
            with open(output_file, 'w') as f:
                json.dump({
                    "file_name": file_path.name,
                    "overall_posix": posix,
                    "detailed_scores": detailed_scores,
                    "trace": trace.__dict__
                }, f, indent=4)
            
            # Store summary
            all_results.append({
                "file_name": file_path.name,
                "posix_score": posix,
                "status": "success"
            })
            
        except Exception as e:
            print(f"Error processing {file_path.name}: {str(e)}")
            all_results.append({
                "file_name": file_path.name,
                "posix_score": None,
                "status": "failed",
                "error": str(e)
            })
            
        # Optional: Clear CUDA cache
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Save summary results
    summary_df = pd.DataFrame(all_results)
    summary_df.to_csv(os.path.join(output_dir, "all_results_summary.csv"), index=False)

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
Using pad_token, but it is not set yet.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.


Loading checkpoint shards: 100%|██████████| 3/3 [00:21<00:00,  7.12s/it]


Flamingo model initialized with 1384781840 trainable parameters
Found 400 files to process


Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_i

Error processing question_179_variants.json: [Errno 2] No such file or directory: '/ephemeral/shashmi/posix_new_improved/matching_subset_images/CXR3991_IM-2044-1001.png'


Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:50277 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_i