In [1]:
# Enhanced Jigsaw Agile Community Rules Competition Solution
# This solution implements a fine-tuned LLaMA 3.1 8B model for Reddit comment rule violation detection

print("="*80)
print("JIGSAW AGILE COMMUNITY RULES COMPETITION - ENHANCED SOLUTION")
print("="*80)

# Environment setup and imports
import os
import math
import numpy as np
import pandas as pd
import torch
from typing import Any, Dict, List
from transformers import LogitsProcessor
import vllm
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import warnings
warnings.filterwarnings('ignore')

# Create ensemble output directory
ENSEMBLE_DIR = 'ensemble_files'
os.makedirs(ENSEMBLE_DIR, exist_ok=True)
print(f"\nEnsemble output directory: {ENSEMBLE_DIR}")

# Performance tracking
start_time = datetime.now()
print(f"\nExecution started at: {start_time}")

# Set CUDA devices for multi-GPU inference
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
print(f"\nGPU Configuration: Using CUDA devices 0 and 1")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")

# Data Loading Section
print("\n" + "="*50)
print("DATA LOADING PHASE")
print("="*50)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("Running in competition environment - loading test data")
    test = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/test.csv')
    sub = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/sample_submission.csv', index_col='row_id')
    is_training = False
else:
    print("Running in development environment - loading training data for validation")
    test = pd.read_csv('/kaggle/input/jigsaw-agile-community-rules/train.csv')
    sub = test[['row_id']].copy()
    sub.set_index('row_id', inplace=True)
    is_training = True

# Data exploration and statistics
print(f"\nDataset shape: {test.shape}")
print(f"Number of comments to process: {len(test)}")
print(f"\nColumn names: {list(test.columns)}")

if is_training:
    print(f"\nTarget distribution in training data:")
    print(test['rule_violation'].value_counts())
    print(f"\nTarget distribution (%):")
    print(test['rule_violation'].value_counts(normalize=True) * 100)

# Analyze rules distribution
print(f"\nUnique rules in dataset: {test['rule'].nunique()}")
print("\nRule distribution:")
print(test['rule'].value_counts())

# Analyze subreddit distribution
print(f"\nUnique subreddits: {test['subreddit'].nunique()}")
print("\nTop 10 subreddits by comment count:")
print(test['subreddit'].value_counts().head(10))

# Text statistics
print("\nComment length statistics:")
test['comment_length'] = test['body'].str.len()
print(test['comment_length'].describe())

# Model Initialization Section
print("\n" + "="*50)
print("MODEL INITIALIZATION PHASE")
print("="*50)

print("Loading fine-tuned LLaMA 3.1 8B model...")
print("Model path: /kaggle/input/jigsaw-llama3-1-8b-instruct-training/llama-8b-instruct-jigsaw")

# Initialize vLLM model with optimized settings
llm = vllm.LLM(
    "/kaggle/input/jigsaw-llama3-1-8b-instruct-training/llama-8b-instruct-jigsaw",
    tensor_parallel_size=2,  # Distribute model across 2 GPUs
    gpu_memory_utilization=0.95,  # Use 95% of GPU memory for maximum batch size
    trust_remote_code=True,
    dtype="half",  # FP16 for faster inference
    enforce_eager=True,  # Disable CUDA graph for compatibility
    max_model_len=2048,  # Maximum sequence length
)
tokenizer = llm.get_tokenizer()
print("Model loaded successfully!")

# Constraint Setup Section
print("\n" + "="*50)
print("OUTPUT CONSTRAINT CONFIGURATION")
print("="*50)

# Define logits processor to constrain output to Yes/No
choices = ["No", "Yes"]
KEEP = []
for x in choices:
    c = tokenizer.encode(x, add_special_tokens=False)[0]
    KEEP.append(c)
print(f"Force predictions to be tokens {KEEP} which are {choices}.")
print("This ensures model outputs are constrained to binary classification")

class DigitLogitsProcessor(LogitsProcessor):
    """
    Custom logits processor that constrains model output to specific tokens.
    This is crucial for ensuring the model only outputs 'Yes' or 'No'.
    """
    def __init__(self, tokenizer):
        self.allowed_ids = KEEP
        
    def __call__(self, input_ids: List[int], scores: torch.Tensor) -> torch.Tensor:
        # Add large positive bias to allowed tokens
        scores[self.allowed_ids] += 100
        return scores

# Prompt Engineering Section
print("\n" + "="*50)
print("PROMPT TEMPLATE CONFIGURATION")
print("="*50)

# Define system prompt for classification task
sys_prompt = '''You are given a comment on reddit and a rule. Your task is to classify whether the comment violates the rule. Only respond Yes/No.'''
print(f"System prompt: {sys_prompt}")

def formatting(dataset):
    """Apply chat template formatting to dataset"""
    texts = []
    for i in range(len(dataset)):
        texts.append(tokenizer.apply_chat_template(dataset[i], tokenize=False, add_generation_prompt=False))
    return texts

# Few-shot learning template with examples
template = """
Subreddit: r/{subreddit}
Rule: {rule}
Examples:
1) {positive_example_1}
Violation: Yes

2) {negative_example_1}
Violation: No

3) {negative_example_2}
Violation: No

4) {positive_example_2}
Violation: Yes
Comment:
{body}
Violation: """

print("\nFew-shot learning approach:")
print("- Each prompt includes 2 positive and 2 negative examples")
print("- This helps the model understand the specific rule context")
print("- Critical for generalizing to unseen rules in test set")

# Dataset Preparation Section
print("\n" + "="*50)
print("DATASET PREPARATION PHASE")
print("="*50)

dataset = []
prompt_lengths = []

for index, row in test.iterrows():
    formatted_sample = [
        {
            "role": "system",
            "content": sys_prompt
        },
        {
            "role": "user",
            "content": template.format(
                rule=row.rule,
                subreddit=row.subreddit,
                body=row.body,
                positive_example_1=row.positive_example_1,
                negative_example_1=row.negative_example_1,
                positive_example_2=row.positive_example_2,
                negative_example_2=row.negative_example_2
            )
        }
    ]
    dataset.append(formatted_sample)
    
    # Track prompt length for analysis
    if index < 5:  # Show first 5 examples
        prompt_length = len(template.format(
            rule=row.rule,
            subreddit=row.subreddit,
            body=row.body,
            positive_example_1=row.positive_example_1,
            negative_example_1=row.negative_example_1,
            positive_example_2=row.positive_example_2,
            negative_example_2=row.negative_example_2
        ))
        prompt_lengths.append(prompt_length)
        print(f"\nExample {index+1} prompt length: {prompt_length} characters")

all_prompts = formatting(dataset)
print(f"\nTotal prompts prepared: {len(all_prompts)}")

# Inference Section
print("\n" + "="*50)
print("INFERENCE PHASE")
print("="*50)

print("Inference configuration:")
print("- Batch processing with vLLM for efficiency")
print("- Temperature=0 for deterministic outputs")
print("- Top-p=0.9 for nucleus sampling")
print("- Max tokens=1 (only need Yes/No)")
print("- Logprobs=2 to get probability distribution")

# Run inference with progress tracking
logits_processors = [DigitLogitsProcessor(tokenizer)]
inference_start = datetime.now()

responses = llm.generate(
    all_prompts,
    vllm.SamplingParams(
        n=1,  # Number of output sequences to return for each prompt
        top_p=0.9,  # Float that controls the cumulative probability of the top tokens to consider
        temperature=0,  # Randomness of the sampling (0 = deterministic)
        seed=777,  # Seed for reproducibility
        skip_special_tokens=True,  # Whether to skip special tokens in the output
        max_tokens=1,  # Maximum number of tokens to generate per output sequence
        logits_processors=logits_processors,
        logprobs=2  # Return log probabilities for top 2 tokens
    ),
    use_tqdm=True  # Show progress bar
)

inference_end = datetime.now()
print(f"\nInference completed in: {inference_end - inference_start}")

# Results Processing Section
print("\n" + "="*50)
print("RESULTS PROCESSING PHASE")
print("="*50)

results = []
errors = 0
raw_predictions = []
confidence_scores = []

for i, response in enumerate(responses):
    try:
        x = response.outputs[0].logprobs[0]
        logprobs = []
        for k in KEEP:
            if k in x:
                logprobs.append(math.exp(x[k].logprob))
            else:
                logprobs.append(0)
                print(f"Warning: Missing logits for token {k} at index {i}")
        logprobs = np.array(logprobs)
        logprobs /= logprobs.sum()  # Normalize to probabilities
        results.append(logprobs)
        
        # Track raw predictions and confidence
        raw_predictions.append("Yes" if logprobs[1] > 0.5 else "No")
        confidence_scores.append(max(logprobs))
    except Exception as e:
        print(f"Error processing response {i}: {e}")
        results.append(np.array([1/2., 1/2.]))  # Default to 50/50 probability
        raw_predictions.append("Unknown")
        confidence_scores.append(0.5)
        errors += 1
        
print(f"\nProcessing complete. There were {errors} inference errors out of {len(responses)} inferences")
print(f"Error rate: {errors/len(responses)*100:.2f}%")

results = np.vstack(results)

# Extract probabilities for positive class (Yes)
probs = [x[1] for x in results]

# Confidence Analysis
print("\n" + "="*50)
print("CONFIDENCE ANALYSIS")
print("="*50)

confidence_scores = np.array(confidence_scores)
print(f"Average confidence score: {confidence_scores.mean():.4f}")
print(f"Minimum confidence: {confidence_scores.min():.4f}")
print(f"Maximum confidence: {confidence_scores.max():.4f}")
print(f"Standard deviation: {confidence_scores.std():.4f}")

# Distribution of predictions
print("\nPrediction distribution:")
print(f"Predicted 'Yes' (violation): {sum(p > 0.5 for p in probs)} ({sum(p > 0.5 for p in probs)/len(probs)*100:.2f}%)")
print(f"Predicted 'No' (no violation): {sum(p <= 0.5 for p in probs)} ({sum(p <= 0.5 for p in probs)/len(probs)*100:.2f}%)")

# Probability distribution analysis
print("\nProbability distribution statistics:")
probs_array = np.array(probs)
print(f"Mean probability of violation: {probs_array.mean():.4f}")
print(f"Median probability: {np.median(probs_array):.4f}")
print(f"Standard deviation: {probs_array.std():.4f}")

# Confidence buckets
print("\nConfidence distribution:")
very_confident_yes = sum(p > 0.9 for p in probs)
confident_yes = sum(0.7 < p <= 0.9 for p in probs)
uncertain = sum(0.3 <= p <= 0.7 for p in probs)
confident_no = sum(0.1 <= p < 0.3 for p in probs)
very_confident_no = sum(p < 0.1 for p in probs)

print(f"Very confident YES (>0.9): {very_confident_yes} ({very_confident_yes/len(probs)*100:.2f}%)")
print(f"Confident YES (0.7-0.9): {confident_yes} ({confident_yes/len(probs)*100:.2f}%)")
print(f"Uncertain (0.3-0.7): {uncertain} ({uncertain/len(probs)*100:.2f}%)")
print(f"Confident NO (0.1-0.3): {confident_no} ({confident_no/len(probs)*100:.2f}%)")
print(f"Very confident NO (<0.1): {very_confident_no} ({very_confident_no/len(probs)*100:.2f}%)")

# Evaluation Section (if training data)
auc_score = None
binary_predictions = None

if is_training:
    print("\n" + "="*50)
    print("MODEL EVALUATION")
    print("="*50)
    
    auc_score = roc_auc_score(test['rule_violation'], probs)
    print(f"AUC Score: {auc_score:.6f}")
    
    # Additional metrics
    binary_predictions = [1 if p > 0.5 else 0 for p in probs]
    
    print("\nClassification Report:")
    print(classification_report(test['rule_violation'], binary_predictions, 
                              target_names=['No Violation', 'Violation']))
    
    # Confusion Matrix
    cm = confusion_matrix(test['rule_violation'], binary_predictions)
    print("\nConfusion Matrix:")
    print(cm)
    
    # Per-rule performance analysis
    print("\nPer-rule AUC scores:")
    for rule in test['rule'].unique():
        rule_mask = test['rule'] == rule
        if rule_mask.sum() > 10:  # Only if enough samples
            rule_auc = roc_auc_score(test.loc[rule_mask, 'rule_violation'], 
                                   [probs[i] for i in range(len(probs)) if rule_mask.iloc[i]])
            print(f"  {rule}: {rule_auc:.4f} (n={rule_mask.sum()})")

# Save to Ensemble Directory Section
print("\n" + "="*50)
print("SAVING TO ENSEMBLE DIRECTORY")
print("="*50)

# Create predictions dataframe for ensemble
ensemble_predictions = pd.DataFrame({
    'row_id': test['row_id'],
    'rule_violation': probs
})

# Save to ensemble directory with model identifier
model_name = 'llama_8b'
ensemble_path = os.path.join(ENSEMBLE_DIR, f'{model_name}_predictions.csv')
ensemble_predictions.to_csv(ensemble_path, index=False)
print(f"Predictions saved to: {ensemble_path}")

# Save metadata
metadata = {
    'model': model_name,
    'model_path': '/kaggle/input/jigsaw-llama3-1-8b-instruct-training/llama-8b-instruct-jigsaw',
    'timestamp': datetime.now().isoformat(),
    'num_predictions': len(probs),
    'mean_prediction': float(probs_array.mean()),
    'std_prediction': float(probs_array.std()),
    'execution_time': str(datetime.now() - start_time),
    'inference_time': str(inference_end - inference_start),
    'auc_score': float(auc_score) if auc_score is not None else None,
    'error_rate': errors/len(responses)*100,
    'gpu_configuration': {
        'num_gpus': 2,
        'tensor_parallel_size': 2,
        'gpu_memory_utilization': 0.95,
        'max_model_len': 2048
    }
}

metadata_path = os.path.join(ENSEMBLE_DIR, f'{model_name}_metadata.json')
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"Metadata saved to: {metadata_path}")

# Create individual submission (for single model testing)
sub['rule_violation'] = probs
individual_submission_path = os.path.join(ENSEMBLE_DIR, f'{model_name}_submission.csv')
sub.to_csv(individual_submission_path)
print(f"Individual submission saved to: {individual_submission_path}")

# Also save standard submission.csv for direct submission
sub.to_csv('submission.csv')
print(f"Standard submission saved to: submission.csv")

# Save additional analysis results
if is_training:
    # Save detailed results for further analysis
    detailed_results = test.copy()
    detailed_results['predicted_probability'] = probs
    detailed_results['predicted_class'] = binary_predictions
    detailed_results['confidence'] = confidence_scores
    detailed_results['correct'] = (detailed_results['rule_violation'] == detailed_results['predicted_class']).astype(int)
    detailed_path = os.path.join(ENSEMBLE_DIR, f'{model_name}_detailed_analysis.csv')
    detailed_results.to_csv(detailed_path, index=False)
    print(f"Detailed analysis saved to: {detailed_path}")

# Performance Summary Section
print("\n" + "="*50)
print("SOLUTION SUMMARY")
print("="*50)

end_time = datetime.now()
total_time = end_time - start_time

print(f"\nTotal execution time: {total_time}")
print(f"Average time per prediction: {total_time.total_seconds()/len(test):.3f} seconds")
print(f"Predictions per second: {len(test)/total_time.total_seconds():.2f}")

# Summary of saved files
print("\n" + "="*50)
print("ENSEMBLE FILES CREATED")
print("="*50)
print(f"Directory: {ENSEMBLE_DIR}/")
print(f"  - {model_name}_predictions.csv (predictions for ensemble)")
print(f"  - {model_name}_metadata.json (model metadata)")
print(f"  - {model_name}_submission.csv (standalone submission)")
print(f"  - submission.csv (direct submission file)")
if is_training:
    print(f"  - {model_name}_detailed_analysis.csv (detailed results)")

# Display submission preview
print("\nSubmission preview:")
print(sub.head(10))

# Advantages and Limitations
print("\n" + "="*50)
print("SOLUTION ANALYSIS: ADVANTAGES")
print("="*50)

print("""
1. STRONG GENERALIZATION: The few-shot learning approach with examples helps the model
   understand new rules not seen during training, which is critical for this competition.

2. EFFICIENT INFERENCE: Using vLLM with tensor parallelism enables fast batch processing
   across multiple GPUs, achieving ~8 predictions per second.

3. ROBUST OUTPUT CONTROL: The logits processor ensures outputs are always valid 
   (Yes/No), preventing formatting errors in submissions.

4. HIGH ACCURACY: The fine-tuned LLaMA 3.1 8B model achieves strong performance
   (0.9565 AUC on training data), indicating good understanding of rule violations.

5. PROBABILISTIC OUTPUTS: Extracting probabilities from logprobs provides calibrated
   confidence scores, important for the AUC evaluation metric.
""")

print("\n" + "="*50)
print("SOLUTION ANALYSIS: LIMITATIONS AND IMPROVEMENTS")
print("="*50)

print("""
1. COMPUTATIONAL REQUIREMENTS: Requires 2 GPUs with significant VRAM (14.74GB each),
   making it resource-intensive and potentially costly to deploy.

2. PROMPT LENGTH CONSTRAINTS: The 2048 token limit may truncate very long comments
   or examples, potentially losing important context.

3. SINGLE MODEL DEPENDENCY: Relies on one model without ensemble benefits. Consider:
   - Ensemble multiple model sizes or architectures
   - Temperature sampling for uncertainty estimation
   - Cross-validation for better generalization

4. LIMITED ERROR HANDLING: Basic error recovery for failed inferences. Could improve:
   - Retry logic for transient failures
   - Fallback to smaller models if memory issues occur
   - Better logging for debugging production issues

5. POTENTIAL IMPROVEMENTS:
   - Dynamic example selection based on comment similarity
   - Rule-specific fine-tuning or adapters
   - Active learning to identify uncertain predictions
   - Post-processing to handle edge cases (empty comments, special characters)
   - Experiment with different prompt templates and orderings
""")

print("\n" + "="*80)
print("EXECUTION COMPLETE")
print("="*80)

JIGSAW AGILE COMMUNITY RULES COMPETITION - ENHANCED SOLUTION

Ensemble output directory: ensemble_files

Execution started at: 2025-07-28 05:32:30.864258

GPU Configuration: Using CUDA devices 0 and 1
PyTorch version: 2.7.1+cu126
CUDA available: True
Number of GPUs: 2
  GPU 0: Tesla T4
  GPU 1: Tesla T4

DATA LOADING PHASE
Running in development environment - loading training data for validation

Dataset shape: (2029, 9)
Number of comments to process: 2029

Column names: ['row_id', 'body', 'rule', 'subreddit', 'positive_example_1', 'positive_example_2', 'negative_example_1', 'negative_example_2', 'rule_violation']

Target distribution in training data:
rule_violation
1    1031
0     998
Name: count, dtype: int64

Target distribution (%):
rule_violation
1    50.813208
0    49.186792
Name: proportion, dtype: float64

Unique rules in dataset: 2

Rule distribution:
rule
No legal advice: Do not offer or request legal advice.                                                     1017
No Advert

2025-07-28 05:32:36.753729: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753680757.096003      76 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753680757.195213      76 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 07-28 05:32:52 [__init__.py:235] Automatically detected platform cuda.
INFO 07-28 05:33:08 [config.py:1604] Using max model len 2048
INFO 07-28 05:33:09 [llm_engine.py:228] Initializing a V0 LLM engine (v0.10.0) with config: model='/kaggle/input/jigsaw-llama3-1-8b-instruct-training/llama-8b-instruct-jigsaw', speculative_config=None, tokenizer='/kaggle/input/jigsaw-llama3-1-8b-instruct-training/llama-8b-instruct-jigsaw', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config

2025-07-28 05:33:14.554852: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753680794.575238     123 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753680794.581437     123 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 07-28 05:33:19 [__init__.py:235] Automatically detected platform cuda.
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:33:20 [multiproc_worker_utils.py:226] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:33:21 [cuda.py:346] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:33:21 [cuda.py:395] Using XFormers backend.


[W728 05:33:32.526578699 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:33:32.878375567 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:33:42.536178032 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 07-28 05:33:52 [__init__.py:1375] Found nccl from library libnccl.so.2
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:33:52 [__init__.py:1375] Found nccl from library libnccl.so.2
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:33:52 [pynccl.py:70] vLLM is using nccl==2.26.2
INFO 07-28 05:33:52 [pynccl.py:70] vLLM is using nccl==2.26.2


[W728 05:33:52.546721686 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 07-28 05:33:52 [custom_all_reduce_utils.py:208] generating GPU P2P access cache in /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
INFO 07-28 05:34:16 [custom_all_reduce_utils.py:246] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:34:16 [custom_all_reduce_utils.py:246] reading GPU P2P access cache from /root/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
INFO 07-28 05:34:16 [shm_broadcast.py:289] vLLM message queue communication handle: Handle(local_reader_ranks=[1], buffer_handle=(1, 4194304, 6, 'psm_c7b2ecc3'), local_subscribe_addr='ipc:///tmp/b65b5e3f-81ed-4832-b2de-68e33fc8ce91', remote_subscribe_addr=None, remote_addr_ipv6=False)
INFO 07-28 05:34:16 [parallel_state.py:1102] rank 0 in world size 2 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:34:16 [parallel_state.py:1102] rank 1 in world size 2 is assigned as DP ra

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 07-28 05:35:40 [default_loader.py:262] Loading weights took 83.07 seconds
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:35:40 [default_loader.py:262] Loading weights took 83.36 seconds
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:35:41 [model_runner.py:1115] Model loading took 7.5123 GiB and 83.592720 seconds
INFO 07-28 05:35:41 [model_runner.py:1115] Model loading took 7.5123 GiB and 83.298796 seconds
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:35:48 [worker.py:295] Memory profiling takes 6.96 seconds
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:35:48 [worker.py:295] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
[1;36m(VllmWorkerProcess pid=123)[0;0m INFO 07-28 05:35:48 [worker.py:295] model weights take 7.51GiB; non_torch_memory takes 0.12GiB; PyTorch activation peak memory takes 0.14GiB; the rest of the memory reserved for KV Cache is 6.23GiB.
INFO 07-28 05:35:48 [worker.py:29

Adding requests:   0%|          | 0/2029 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/2029 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s…


Inference completed in: 0:05:15.188368

RESULTS PROCESSING PHASE

Processing complete. There were 0 inference errors out of 2029 inferences
Error rate: 0.00%

CONFIDENCE ANALYSIS
Average confidence score: 0.7869
Minimum confidence: 0.5000
Maximum confidence: 0.9724
Standard deviation: 0.1139

Prediction distribution:
Predicted 'Yes' (violation): 1070 (52.74%)
Predicted 'No' (no violation): 959 (47.26%)

Probability distribution statistics:
Mean probability of violation: 0.5287
Median probability: 0.5927
Standard deviation: 0.3074

Confidence distribution:
Very confident YES (>0.9): 215 (10.60%)
Confident YES (0.7-0.9): 662 (32.63%)
Uncertain (0.3-0.7): 456 (22.47%)
Confident NO (0.1-0.3): 558 (27.50%)
Very confident NO (<0.1): 138 (6.80%)

MODEL EVALUATION
AUC Score: 0.956509

Classification Report:
              precision    recall  f1-score   support

No Violation       0.92      0.89      0.90       998
   Violation       0.89      0.93      0.91      1031

    accuracy            

In [2]:
"""
Enhanced GPU Memory Management and Deep Cleanup Script
Ensures complete cleanup of distributed computing resources and memory between model executions
"""

import os
import gc
import torch
import psutil
import time
import subprocess
import signal
import shutil
from datetime import datetime
import json

def deep_cleanup_distributed():
    """Perform deep cleanup of all distributed computing resources"""
    print("\n" + "="*80)
    print("DEEP DISTRIBUTED CLEANUP PROCEDURE")
    print("="*80)
    
    # Step 1: Destroy PyTorch distributed process groups
    print("\nStep 1: Cleaning up distributed process groups...")
    try:
        if torch.distributed.is_initialized():
            torch.distributed.destroy_process_group()
            print("  ✓ Destroyed distributed process group")
        else:
            print("  - No active distributed process group found")
    except Exception as e:
        print(f"  - Error destroying process group: {e}")
    
    # Step 2: Kill all vLLM worker processes
    print("\nStep 2: Terminating vLLM worker processes...")
    terminated_count = 0
    try:
        # Find all vLLM related processes
        for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
            try:
                cmdline = ' '.join(proc.info['cmdline'] or [])
                if any(keyword in cmdline for keyword in ['VllmWorkerProcess', 'vllm', 'ray::RayWorker']):
                    proc.terminate()
                    terminated_count += 1
                    print(f"  ✓ Terminated process {proc.info['pid']}: {proc.info['name']}")
            except (psutil.NoSuchProcess, psutil.AccessDenied):
                continue
        
        # Give processes time to terminate gracefully
        time.sleep(2)
        
        # Force kill any remaining processes
        for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
            try:
                cmdline = ' '.join(proc.info['cmdline'] or [])
                if any(keyword in cmdline for keyword in ['VllmWorkerProcess', 'vllm', 'ray::RayWorker']):
                    proc.kill()
                    print(f"  ✓ Force killed process {proc.info['pid']}")
            except (psutil.NoSuchProcess, psutil.AccessDenied):
                continue
                
        if terminated_count == 0:
            print("  - No vLLM worker processes found")
    except Exception as e:
        print(f"  - Error terminating processes: {e}")
    
    # Step 3: Clean up shared memory segments
    print("\nStep 3: Cleaning up shared memory...")
    try:
        # Clean /dev/shm
        shm_path = '/dev/shm'
        if os.path.exists(shm_path):
            for item in os.listdir(shm_path):
                if any(pattern in item for pattern in ['torch', 'nccl', 'cuda', 'vllm']):
                    item_path = os.path.join(shm_path, item)
                    try:
                        if os.path.isfile(item_path):
                            os.unlink(item_path)
                        elif os.path.isdir(item_path):
                            shutil.rmtree(item_path)
                        print(f"  ✓ Removed shared memory: {item}")
                    except Exception as e:
                        print(f"  - Could not remove {item}: {e}")
    except Exception as e:
        print(f"  - Error cleaning shared memory: {e}")
    
    # Step 4: Clean up temporary communication files
    print("\nStep 4: Cleaning up temporary communication files...")
    try:
        # Clean /tmp directory
        tmp_patterns = ['vllm', 'nccl', 'torch', 'ray', 'cuda']
        cleaned_count = 0
        for item in os.listdir('/tmp'):
            if any(pattern in item for pattern in tmp_patterns):
                item_path = os.path.join('/tmp', item)
                try:
                    if os.path.isfile(item_path):
                        os.unlink(item_path)
                    elif os.path.isdir(item_path):
                        shutil.rmtree(item_path)
                    cleaned_count += 1
                except Exception:
                    continue
        print(f"  ✓ Cleaned {cleaned_count} temporary files/directories")
    except Exception as e:
        print(f"  - Error cleaning temporary files: {e}")
    
    # Step 5: Reset CUDA context
    print("\nStep 5: Resetting CUDA context...")
    if torch.cuda.is_available():
        try:
            # Clear CUDA IPC memory
            torch.cuda.ipc_collect()
            print("  ✓ Collected CUDA IPC memory")
            
            # Reset all GPUs
            for gpu_id in range(torch.cuda.device_count()):
                torch.cuda.set_device(gpu_id)
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
                torch.cuda.reset_peak_memory_stats(gpu_id)
                torch.cuda.reset_accumulated_memory_stats(gpu_id)
                print(f"  ✓ Reset GPU {gpu_id}")
                
        except Exception as e:
            print(f"  - Error resetting CUDA: {e}")
    
    # Step 6: Clear environment variables
    print("\nStep 6: Clearing distributed environment variables...")
    env_vars_to_clear = [
        'MASTER_ADDR', 'MASTER_PORT', 'RANK', 'WORLD_SIZE', 'LOCAL_RANK',
        'NCCL_SOCKET_IFNAME', 'NCCL_IB_DISABLE', 'NCCL_SOCKET_NTHREADS',
        'NCCL_NSOCKS_PERTHREAD', 'CUDA_VISIBLE_DEVICES'
    ]
    for var in env_vars_to_clear:
        if var in os.environ:
            del os.environ[var]
            print(f"  ✓ Cleared {var}")
    
    # Step 7: Force comprehensive garbage collection
    print("\nStep 7: Running comprehensive garbage collection...")
    for i in range(5):
        collected = gc.collect()
        print(f"  ✓ Pass {i+1}: Collected {collected} objects")
        time.sleep(0.5)

def comprehensive_cleanup(verbose=True):
    """Main cleanup function that combines memory cleanup with deep distributed cleanup"""
    if verbose:
        print("\n" + "="*80)
        print("COMPREHENSIVE CLEANUP PROCEDURE")
        print("="*80)
        
    start_time = datetime.now()
    
    # Get initial memory status
    initial_memory = get_memory_info()
    if verbose:
        print_memory_status("Initial Memory Status")
    
    # Perform deep distributed cleanup first
    deep_cleanup_distributed()
    
    # Wait for processes to fully terminate
    print("\nWaiting for process termination...")
    time.sleep(3)
    
    # Standard memory cleanup
    print("\n" + "="*80)
    print("STANDARD MEMORY CLEANUP")
    print("="*80)
    
    # Clear Python garbage collection
    print("\nRunning Python garbage collection...")
    collected = gc.collect()
    print(f"  Collected {collected} objects")
    
    # Clear PyTorch cache
    if torch.cuda.is_available():
        print("\nClearing PyTorch CUDA cache...")
        for gpu_id in range(torch.cuda.device_count()):
            torch.cuda.set_device(gpu_id)
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
            print(f"  ✓ Cleared cache for GPU {gpu_id}")
    
    # Final memory status
    final_memory = get_memory_info()
    if verbose:
        print_memory_status("Final Memory Status")
    
    # Calculate cleanup duration
    cleanup_duration = datetime.now() - start_time
    print(f"\nTotal cleanup time: {cleanup_duration.total_seconds():.2f} seconds")
    
    # Verify cleanup effectiveness
    print("\n" + "="*80)
    print("CLEANUP VERIFICATION")
    print("="*80)
    
    # Check for remaining processes
    remaining_processes = []
    for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
        try:
            cmdline = ' '.join(proc.info['cmdline'] or [])
            if any(keyword in cmdline for keyword in ['VllmWorkerProcess', 'vllm', 'ray']):
                remaining_processes.append(proc.info)
        except (psutil.NoSuchProcess, psutil.AccessDenied):
            continue
    
    if remaining_processes:
        print(f"⚠ Warning: {len(remaining_processes)} vLLM-related processes still running")
        for proc in remaining_processes[:5]:  # Show first 5
            print(f"  - PID {proc['pid']}: {proc['name']}")
    else:
        print("✓ All vLLM processes successfully terminated")
    
    # Check memory freed
    if initial_memory['gpu'] and final_memory['gpu']:
        for i in range(len(initial_memory['gpu'])):
            initial_free = initial_memory['gpu'][i]['free_memory_gb']
            final_free = final_memory['gpu'][i]['free_memory_gb']
            freed = final_free - initial_free
            print(f"✓ GPU {i}: Freed {freed:.2f} GB of memory")
    
    return cleanup_duration

def prepare_for_qwen():
    """Prepare environment specifically for QWEN model initialization"""
    print("\n" + "="*80)
    print("PREPARING ENVIRONMENT FOR QWEN")
    print("="*80)
    
    # Set environment variables to prevent distributed initialization
    print("\nSetting environment variables for single-node execution...")
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '29500'
    os.environ['RANK'] = '0'
    os.environ['WORLD_SIZE'] = '1'
    os.environ['LOCAL_RANK'] = '0'
    
    # Force NCCL to use local communication
    os.environ['NCCL_SOCKET_IFNAME'] = 'lo'
    os.environ['NCCL_IB_DISABLE'] = '1'
    os.environ['NCCL_P2P_DISABLE'] = '1'
    
    # Disable hostname lookup
    os.environ['NCCL_SOCKET_NTHREADS'] = '2'
    os.environ['NCCL_NSOCKS_PERTHREAD'] = '4'
    
    # Set CUDA devices explicitly
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    
    print("✓ Environment configured for local execution")
    
    # Verify no distributed processes are running
    print("\nVerifying clean environment...")
    issues = []
    
    # Check for any remaining vLLM processes
    for proc in psutil.process_iter(['name', 'cmdline']):
        try:
            cmdline = ' '.join(proc.info['cmdline'] or [])
            if 'vllm' in cmdline.lower():
                issues.append(f"vLLM process still running: PID {proc.pid}")
        except:
            continue
    
    # Check shared memory
    if os.path.exists('/dev/shm'):
        shm_files = [f for f in os.listdir('/dev/shm') if any(p in f for p in ['torch', 'nccl', 'vllm'])]
        if shm_files:
            issues.append(f"Shared memory files still exist: {shm_files[:3]}")
    
    if issues:
        print("⚠ Warning: Found potential issues:")
        for issue in issues:
            print(f"  - {issue}")
    else:
        print("✓ Environment is clean and ready for QWEN")
    
    print("\n" + "="*80)
    print("QWEN PREPARATION COMPLETE")
    print("="*80)

# Helper functions from original script
def get_memory_info():
    """Retrieve comprehensive memory information for both CPU and GPU"""
    memory_info = {
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'cpu': {},
        'gpu': []
    }
    
    cpu_memory = psutil.virtual_memory()
    memory_info['cpu'] = {
        'total_gb': cpu_memory.total / (1024**3),
        'available_gb': cpu_memory.available / (1024**3),
        'used_gb': cpu_memory.used / (1024**3),
        'percent_used': cpu_memory.percent
    }
    
    if torch.cuda.is_available():
        for gpu_id in range(torch.cuda.device_count()):
            gpu_props = torch.cuda.get_device_properties(gpu_id)
            memory_info['gpu'].append({
                'id': gpu_id,
                'name': gpu_props.name,
                'total_memory_gb': gpu_props.total_memory / (1024**3),
                'allocated_memory_gb': torch.cuda.memory_allocated(gpu_id) / (1024**3),
                'reserved_memory_gb': torch.cuda.memory_reserved(gpu_id) / (1024**3),
                'free_memory_gb': (gpu_props.total_memory - torch.cuda.memory_reserved(gpu_id)) / (1024**3)
            })
    
    return memory_info

def print_memory_status(title="Memory Status"):
    """Display current memory status in a formatted manner"""
    print("\n" + "="*80)
    print(title.upper())
    print("="*80)
    
    memory_info = get_memory_info()
    
    cpu_info = memory_info['cpu']
    print("\nCPU Memory:")
    print(f"  Total: {cpu_info['total_gb']:.2f} GB")
    print(f"  Used: {cpu_info['used_gb']:.2f} GB ({cpu_info['percent_used']:.1f}%)")
    print(f"  Available: {cpu_info['available_gb']:.2f} GB")
    
    if memory_info['gpu']:
        print("\nGPU Memory:")
        for gpu in memory_info['gpu']:
            print(f"\n  GPU {gpu['id']} ({gpu['name']}):")
            print(f"    Total: {gpu['total_memory_gb']:.2f} GB")
            print(f"    Allocated: {gpu['allocated_memory_gb']:.2f} GB")
            print(f"    Reserved: {gpu['reserved_memory_gb']:.2f} GB")
            print(f"    Free: {gpu['free_memory_gb']:.2f} GB")

# Main execution function
def main():
    """Execute complete cleanup and prepare for QWEN"""
    print("="*80)
    print("ENHANCED GPU MEMORY MANAGEMENT AND CLEANUP")
    print("="*80)
    print(f"Execution started at: {datetime.now()}")
    
    # Perform comprehensive cleanup
    comprehensive_cleanup(verbose=True)
    
    # Additional wait to ensure complete termination
    print("\nWaiting for complete system stabilization...")
    time.sleep(5)
    
    # Prepare environment for QWEN
    prepare_for_qwen()
    
    # Final memory check
    print_memory_status("Final System Status")
    
    print("\n" + "="*80)
    print("SYSTEM READY FOR QWEN MODEL EXECUTION")
    print("="*80)
    
    return get_memory_info()

if __name__ == "__main__":
    final_memory_info = main()
    with open('deep_cleanup_log.json', 'w') as f:
        json.dump(final_memory_info, f, indent=2)

ENHANCED GPU MEMORY MANAGEMENT AND CLEANUP
Execution started at: 2025-07-28 05:41:09.353260

COMPREHENSIVE CLEANUP PROCEDURE

INITIAL MEMORY STATUS

CPU Memory:
  Total: 31.35 GB
  Used: 3.89 GB (40.1%)
  Available: 18.77 GB

GPU Memory:

  GPU 0 (Tesla T4):
    Total: 14.74 GB
    Allocated: 12.72 GB
    Reserved: 13.70 GB
    Free: 1.04 GB

  GPU 1 (Tesla T4):
    Total: 14.74 GB
    Allocated: 0.00 GB
    Reserved: 0.00 GB
    Free: 14.74 GB

DEEP DISTRIBUTED CLEANUP PROCEDURE

Step 1: Cleaning up distributed process groups...
  ✓ Destroyed distributed process group

Step 2: Terminating vLLM worker processes...
  - No vLLM worker processes found

Step 3: Cleaning up shared memory...

Step 4: Cleaning up temporary communication files...
  ✓ Cleaned 1 temporary files/directories

Step 5: Resetting CUDA context...
  ✓ Collected CUDA IPC memory
  ✓ Reset GPU 0
  ✓ Reset GPU 1

Step 6: Clearing distributed environment variables...
  ✓ Cleared CUDA_VISIBLE_DEVICES

Step 7: Running compreh

In [3]:
"""
Enhanced Qwen 7B Model for Jigsaw Agile Community Rules Competition
Fixed distributed computing issues and improved GPU memory management
"""

import os
import sys
import gc
import time
import subprocess
import numpy as np
import pandas as pd
import torch
import psutil
import signal
from typing import List, Dict, Optional, Any, Tuple
from datetime import datetime
import json
import warnings
warnings.filterwarnings('ignore')

# Kill any existing distributed processes
def kill_distributed_processes():
    """Kill any existing PyTorch distributed processes"""
    try:
        if torch.distributed.is_initialized():
            torch.distributed.destroy_process_group()
    except:
        pass

class GPUMemoryManager:
    """Advanced GPU memory management utilities"""
    
    @staticmethod
    def get_gpu_memory_info() -> List[Dict[str, float]]:
        """Get detailed memory information for all GPUs"""
        gpu_info = []
        
        if not torch.cuda.is_available():
            return gpu_info
            
        for i in range(torch.cuda.device_count()):
            torch.cuda.set_device(i)
            props = torch.cuda.get_device_properties(i)
            
            # Get memory stats in GB
            total_memory = props.total_memory / (1024**3)
            reserved_memory = torch.cuda.memory_reserved(i) / (1024**3)
            allocated_memory = torch.cuda.memory_allocated(i) / (1024**3)
            free_memory = total_memory - reserved_memory
            
            gpu_info.append({
                'id': i,
                'name': props.name,
                'total_memory_gb': total_memory,
                'reserved_memory_gb': reserved_memory,
                'allocated_memory_gb': allocated_memory,
                'free_memory_gb': free_memory,
                'utilization_percent': (reserved_memory / total_memory) * 100
            })
            
        return gpu_info
    
    @staticmethod
    def reset_gpu(gpu_id: int, aggressive: bool = False) -> bool:
        """Reset a specific GPU to free memory"""
        print(f"\nResetting GPU {gpu_id}...")
        
        try:
            torch.cuda.set_device(gpu_id)
            torch.cuda.empty_cache()
            torch.cuda.synchronize(gpu_id)
            gc.collect()
            
            if aggressive:
                # Additional aggressive cleanup
                torch.cuda.reset_peak_memory_stats(gpu_id)
                torch.cuda.reset_accumulated_memory_stats(gpu_id)
                
                for _ in range(3):
                    gc.collect()
                    torch.cuda.empty_cache()
                    time.sleep(0.5)
            
            return True
            
        except Exception as e:
            print(f"Failed to reset GPU {gpu_id}: {e}")
            return False
    
    @staticmethod
    def reset_all_gpus(aggressive: bool = False) -> None:
        """Reset all available GPUs"""
        if not torch.cuda.is_available():
            return
            
        for i in range(torch.cuda.device_count()):
            GPUMemoryManager.reset_gpu(i, aggressive)

class QwenModelRunner:
    """QWEN model runner with fixed distributed computing and GPU management"""
    
    def __init__(self, model_path: str, ensemble_dir: str = 'ensemble_files'):
        self.model_path = model_path
        self.ensemble_dir = ensemble_dir
        self.model = None
        self.tokenizer = None
        self.start_time = datetime.now()
        self.gpu_manager = GPUMemoryManager()
        
        # Adjusted model size for QWEN 7B (more realistic)
        self.model_size_gb = 13.0  # QWEN 7B is typically around 13GB
        
        # Initialize GPU configuration attributes
        self.gpu_config = None
        self.num_gpus = None
        self.requires_multi_gpu = False
        
        # Kill any existing distributed processes
        kill_distributed_processes()
        
        # Create ensemble directory
        os.makedirs(self.ensemble_dir, exist_ok=True)
        
        # Initialize environment
        self._setup_environment()
        
    def _setup_environment(self):
        """Configure environment to prevent distributed computing issues"""
        print("\n" + "="*50)
        print("ENVIRONMENT CONFIGURATION")
        print("="*50)
        
        # Clean up any existing distributed state
        kill_distributed_processes()
        
        # Force single-node execution with fixed parameters
        env_vars = {
            'CUDA_VISIBLE_DEVICES': '0,1',
            'OMP_NUM_THREADS': '1',
            'MKL_NUM_THREADS': '1',
            'TOKENIZERS_PARALLELISM': 'false',
            'VLLM_WORKER_MULTIPROC_METHOD': 'spawn',
            'VLLM_ATTENTION_BACKEND': 'XFORMERS'  # More stable than FLASH_ATTN
        }
        
        for key, value in env_vars.items():
            os.environ[key] = value
            print(f"  Set {key}={value}")
        
        # Remove distributed-related environment variables
        distributed_vars = ['MASTER_ADDR', 'MASTER_PORT', 'RANK', 'WORLD_SIZE', 
                          'LOCAL_RANK', 'NCCL_SOCKET_IFNAME', 'NCCL_IB_DISABLE',
                          'NCCL_P2P_DISABLE', 'NCCL_TREE_THRESHOLD']
        
        for var in distributed_vars:
            if var in os.environ:
                del os.environ[var]
                print(f"  Removed {var}")
        
        print("\n✓ Environment configured for single-node execution")
    
    def _analyze_gpu_configuration(self) -> Tuple[Optional[str], Optional[int], bool]:
        """Analyze and determine optimal GPU configuration"""
        print("\n" + "="*50)
        print("GPU CONFIGURATION ANALYSIS")
        print("="*50)
        
        gpu_info = self.gpu_manager.get_gpu_memory_info()
        
        if not gpu_info:
            return None, None, False
        
        # Display current GPU status
        print("\nCurrent GPU Status:")
        print("-" * 80)
        print(f"{'GPU':>4} {'Name':>20} {'Total':>8} {'Free':>8} {'Used':>8} {'Util%':>6}")
        print("-" * 80)
        
        for gpu in gpu_info:
            print(f"{gpu['id']:>4} {gpu['name']:>20} {gpu['total_memory_gb']:>8.1f} "
                  f"{gpu['free_memory_gb']:>8.1f} {gpu['allocated_memory_gb']:>8.1f} "
                  f"{gpu['utilization_percent']:>6.1f}")
        
        # Sort GPUs by free memory
        gpu_info.sort(key=lambda x: x['free_memory_gb'], reverse=True)
        
        # Estimate memory requirement with overhead
        required_memory = self.model_size_gb * 1.15  # Reduced overhead
        
        print(f"\nModel Memory Requirements:")
        print(f"  Base model size: {self.model_size_gb:.1f} GB")
        print(f"  Required (with overhead): {required_memory:.1f} GB")
        
        # First, try aggressive GPU reset to free memory
        print("\nPerforming aggressive GPU cleanup...")
        self.gpu_manager.reset_all_gpus(aggressive=True)
        time.sleep(2)
        
        # Re-check after cleanup
        gpu_info = self.gpu_manager.get_gpu_memory_info()
        gpu_info.sort(key=lambda x: x['free_memory_gb'], reverse=True)
        
        print("\nGPU Status After Cleanup:")
        for gpu in gpu_info:
            print(f"  GPU {gpu['id']}: {gpu['free_memory_gb']:.1f} GB free")
        
        # Determine configuration
        if gpu_info[0]['free_memory_gb'] >= required_memory:
            selected_gpu = str(gpu_info[0]['id'])
            num_gpus = 1
            multi_gpu = False
            print(f"\n✓ Single GPU Configuration: GPU {selected_gpu} ({gpu_info[0]['free_memory_gb']:.1f} GB free)")
            return selected_gpu, num_gpus, multi_gpu
            
        elif len(gpu_info) > 1:
            # Check if we can use multi-GPU
            combined_memory = sum(g['free_memory_gb'] for g in gpu_info[:2])
            if combined_memory >= required_memory:
                selected_gpu = '0,1'
                num_gpus = 2
                multi_gpu = True
                print(f"\n✓ Multi-GPU Configuration:")
                print(f"  Combined memory: {combined_memory:.1f} GB")
                print(f"  Will use tensor parallelism across GPUs 0,1")
                return selected_gpu, num_gpus, multi_gpu
        
        print(f"\n✗ Insufficient GPU memory. Required: {required_memory:.1f} GB")
        return None, None, False
    
    def _check_system_resources(self) -> bool:
        """Verify system has sufficient resources"""
        print("\n" + "="*50)
        print("SYSTEM RESOURCE CHECK")
        print("="*50)
        
        issues = []
        
        # Check CPU memory
        cpu_memory = psutil.virtual_memory()
        available_gb = cpu_memory.available / (1024**3)
        if available_gb < 10:
            issues.append(f"Low CPU memory: {available_gb:.1f} GB available")
        else:
            print(f"✓ CPU Memory: {available_gb:.1f} GB available")
        
        # Check GPU availability
        if not torch.cuda.is_available():
            issues.append("No CUDA devices available")
            self.gpu_config = None
            self.num_gpus = 0
            self.requires_multi_gpu = False
        else:
            gpu_count = torch.cuda.device_count()
            print(f"✓ Found {gpu_count} GPU(s)")
            
            # Analyze GPU configuration
            gpu_config, num_gpus, multi_gpu = self._analyze_gpu_configuration()
            
            if gpu_config is None:
                issues.append("Insufficient GPU memory for model")
                self.gpu_config = None
                self.num_gpus = 0
                self.requires_multi_gpu = False
            else:
                self.gpu_config = gpu_config
                self.num_gpus = num_gpus
                self.requires_multi_gpu = multi_gpu
        
        if issues:
            print("\n⚠ System resource issues:")
            for issue in issues:
                print(f"  - {issue}")
            return False
        else:
            print("\n✓ All system checks passed")
            return True
    
    def _install_dependencies(self) -> bool:
        """Install required dependencies"""
        print("\n" + "="*50)
        print("DEPENDENCY INSTALLATION")
        print("="*50)
        
        try:
            from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor
            print("✓ logits_processor_zoo already installed")
            return True
        except ImportError:
            print("Installing logits_processor_zoo...")
            try:
                result = subprocess.run(
                    [sys.executable, "-m", "pip", "install", "logits_processor_zoo", "--no-deps"],
                    capture_output=True,
                    text=True,
                    timeout=60
                )
                
                if result.returncode == 0:
                    print("✓ Successfully installed")
                    return True
                else:
                    print(f"⚠ Installation failed: {result.stderr}")
                    return False
                    
            except Exception as e:
                print(f"⚠ Installation error: {e}")
                return False
    
    def _initialize_model_with_retry(self, max_attempts: int = 3) -> bool:
        """Initialize vLLM model with proper error handling"""
        print("\n" + "="*50)
        print("MODEL INITIALIZATION")
        print("="*50)
        
        if self.gpu_config is None:
            print("✗ No valid GPU configuration available")
            return False
        
        # Clean up any existing distributed state before each attempt
        kill_distributed_processes()
        
        # Get GPU configuration
        optimal_gpus = self.gpu_config
        num_gpus = self.num_gpus
        
        for attempt in range(max_attempts):
            print(f"\nAttempt {attempt + 1}/{max_attempts}")
            
            try:
                # Clean up previous attempt
                if hasattr(self, 'model') and self.model is not None:
                    del self.model
                    gc.collect()
                    torch.cuda.empty_cache()
                    time.sleep(2)
                
                # Kill distributed processes again
                kill_distributed_processes()
                
                # Set CUDA devices
                os.environ['CUDA_VISIBLE_DEVICES'] = optimal_gpus
                
                # Progressive parameters - adjust based on attempt
                if attempt == 0:
                    gpu_utilization = 0.90
                    max_model_len = 2048
                elif attempt == 1:
                    gpu_utilization = 0.85
                    max_model_len = 1536
                else:
                    gpu_utilization = 0.80
                    max_model_len = 1024
                
                print(f"Initializing with:")
                print(f"  - GPU(s): {optimal_gpus}")
                print(f"  - Tensor parallel size: {num_gpus}")
                print(f"  - GPU utilization: {gpu_utilization:.2f}")
                print(f"  - Max model length: {max_model_len}")
                
                if self.requires_multi_gpu:
                    print(f"  - Multi-GPU distribution: ACTIVE")
                
                # Import vLLM here to ensure clean state
                import vllm
                
                # Initialize model with simplified parameters
                self.model = vllm.LLM(
                    model=self.model_path,
                    tensor_parallel_size=num_gpus,
                    gpu_memory_utilization=gpu_utilization,
                    trust_remote_code=True,
                    dtype="half",
                    enforce_eager=True,
                    max_model_len=max_model_len,
                    disable_log_stats=True,
                    seed=42
                )
                
                print("✓ Model initialized successfully")
                
                # Get tokenizer
                self.tokenizer = self.model.get_tokenizer()
                print("✓ Tokenizer loaded")
                
                return True
                
            except Exception as e:
                print(f"✗ Initialization failed: {str(e)[:200]}")
                
                # Clean up
                if hasattr(self, 'model'):
                    del self.model
                gc.collect()
                torch.cuda.empty_cache()
                kill_distributed_processes()
                
                if attempt < max_attempts - 1:
                    wait_time = (attempt + 1) * 5
                    print(f"Waiting {wait_time} seconds before retry...")
                    time.sleep(wait_time)
                    
                    # On last attempt, try single GPU if using multi-GPU
                    if attempt == max_attempts - 2 and self.num_gpus > 1:
                        print("Switching to single GPU for final attempt...")
                        gpu_info = self.gpu_manager.get_gpu_memory_info()
                        gpu_info.sort(key=lambda x: x['free_memory_gb'], reverse=True)
                        if gpu_info and gpu_info[0]['free_memory_gb'] >= self.model_size_gb:
                            self.gpu_config = str(gpu_info[0]['id'])
                            self.num_gpus = 1
                            self.requires_multi_gpu = False
                            optimal_gpus = self.gpu_config
                            num_gpus = 1
                else:
                    print("✗ All initialization attempts failed")
                    return False
        
        return False
    
    def _create_logits_processor(self):
        """Create logits processor with fallback"""
        try:
            from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor
            processor = MultipleChoiceLogitsProcessor(self.tokenizer, choices=["Yes", "No"])
            print("✓ Using external MultipleChoiceLogitsProcessor")
            return processor, True
        except Exception as e:
            print(f"⚠ Using fallback logits processor: {e}")
            
            class FallbackProcessor:
                def __init__(self, tokenizer, choices):
                    self.tokenizer = tokenizer
                    self.choices = choices
                    self.allowed_ids = []
                    
                    for choice in choices:
                        tokens = tokenizer.encode(choice, add_special_tokens=False)
                        if tokens:
                            self.allowed_ids.append(tokens[0])
                
                def __call__(self, input_ids, scores):
                    mask = torch.full_like(scores, float('-inf'))
                    for token_id in self.allowed_ids:
                        if token_id < len(scores):
                            mask[token_id] = 0
                    return scores + mask
            
            processor = FallbackProcessor(self.tokenizer, ["Yes", "No"])
            return processor, False
    
    def run_inference(self, df: pd.DataFrame, is_training: bool = False) -> Optional[Dict[str, Any]]:
        """Run model inference"""
        
        # System checks
        if not self._check_system_resources():
            print("✗ System resource check failed")
            return None
        
        # Install dependencies
        external_processor = self._install_dependencies()
        
        # Initialize model
        if not self._initialize_model_with_retry():
            print("✗ Failed to initialize model")
            return None
        
        # Display GPU configuration
        print("\n" + "="*50)
        print("FINAL GPU CONFIGURATION")
        print("="*50)
        print(f"Active GPUs: {self.gpu_config}")
        print(f"Distribution: {'Multi-GPU' if self.requires_multi_gpu else 'Single-GPU'}")
        
        # Create prompts
        print("\n" + "="*50)
        print("DATASET PREPARATION")
        print("="*50)
        
        SYS_PROMPT = "You are given a comment on reddit. Your task is to classify if it violates the given rule. Only respond Yes/No."
        
        prompts = []
        for _, row in df.iterrows():
            text = f"""r/{row.subreddit}
Rule: {row.rule}

1) {row.positive_example_1}
Violation: Yes

2) {row.negative_example_1}
Violation: No

3) {row.negative_example_2}
Violation: No

4) {row.positive_example_2}
Violation: Yes

5) {row.body}
"""
            
            messages = [
                {"role": "system", "content": SYS_PROMPT},
                {"role": "user", "content": text}
            ]
            
            prompt = self.tokenizer.apply_chat_template(
                messages,
                add_generation_prompt=True,
                tokenize=False,
            ) + "Violation: "
            prompts.append(prompt)
        
        print(f"✓ Prepared {len(prompts)} prompts")
        
        # Create logits processor
        processor, using_external = self._create_logits_processor()
        
        # Run inference
        print("\n" + "="*50)
        print("INFERENCE PHASE")
        print("="*50)
        
        import vllm
        
        inference_start = datetime.now()
        
        try:
            # Adjust batch size based on GPU configuration
            batch_size = 32 if self.requires_multi_gpu else 64
            all_outputs = []
            
            for i in range(0, len(prompts), batch_size):
                batch_prompts = prompts[i:i+batch_size]
                batch_num = i//batch_size + 1
                total_batches = (len(prompts) + batch_size - 1)//batch_size
                
                print(f"Processing batch {batch_num}/{total_batches}")
                
                batch_outputs = self.model.generate(
                    batch_prompts,
                    vllm.SamplingParams(
                        seed=0,
                        skip_special_tokens=True,
                        max_tokens=1,
                        logits_processors=[processor] if processor else None,
                        logprobs=2,
                        temperature=0.0,
                        top_p=1.0
                    ),
                    use_tqdm=True
                )
                
                all_outputs.extend(batch_outputs)
                
                # Clear cache between batches
                gc.collect()
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
            
            outputs = all_outputs
            inference_end = datetime.now()
            print(f"\n✓ Inference completed in: {inference_end - inference_start}")
            
        except Exception as e:
            print(f"\n✗ Inference failed: {str(e)[:500]}")
            import traceback
            traceback.print_exc()
            return None
        
        # Process results
        print("\n" + "="*50)
        print("RESULTS PROCESSING")
        print("="*50)
        
        predictions = self._process_outputs(outputs)
        
        if predictions is None:
            return None
        
        # Calculate metrics and save
        results = self._calculate_metrics_and_save(df, predictions, is_training, using_external)
        
        # Add GPU configuration info
        results['gpu_configuration'] = {
            'gpus_used': self.gpu_config,
            'num_gpus': self.num_gpus,
            'multi_gpu': self.requires_multi_gpu
        }
        
        return results
    
    def _process_outputs(self, outputs) -> Optional[np.ndarray]:
        """Process model outputs"""
        logprobs = []
        
        for i, output in enumerate(outputs):
            try:
                logprob_dict = {}
                logprob_data = output.outputs[0].logprobs[0]
                
                # Extract logprobs - handle different formats
                if hasattr(logprob_data, 'values'):
                    # New format
                    for lp in logprob_data.values():
                        if hasattr(lp, 'decoded_token') and hasattr(lp, 'logprob'):
                            logprob_dict[lp.decoded_token] = lp.logprob
                elif isinstance(logprob_data, dict):
                    # Old format
                    for token_id, lp in logprob_data.items():
                        if hasattr(lp, 'decoded_token') and hasattr(lp, 'logprob'):
                            logprob_dict[lp.decoded_token] = lp.logprob
                        elif isinstance(lp, dict) and 'decoded_token' in lp:
                            logprob_dict[lp['decoded_token']] = lp.get('logprob', -10)
                        elif isinstance(lp, (int, float)):
                            # Try to decode token ID
                            try:
                                decoded = self.tokenizer.decode([int(token_id)])
                                logprob_dict[decoded] = lp
                            except:
                                pass
                
                # Fallback based on generated text
                if not logprob_dict:
                    generated_text = output.outputs[0].text.strip()
                    if generated_text == "Yes":
                        logprob_dict = {"Yes": 0, "No": -10}
                    elif generated_text == "No":
                        logprob_dict = {"Yes": -10, "No": 0}
                    else:
                        logprob_dict = {"Yes": -5, "No": -5}
                
                logprobs.append(logprob_dict)
                
            except Exception as e:
                print(f"⚠ Error processing output {i}: {e}")
                # Use generated text as fallback
                try:
                    generated_text = output.outputs[0].text.strip()
                    if generated_text == "Yes":
                        logprobs.append({"Yes": 0, "No": -10})
                    elif generated_text == "No":
                        logprobs.append({"Yes": -10, "No": 0})
                    else:
                        logprobs.append({"Yes": -5, "No": -5})
                except:
                    logprobs.append({"Yes": -5, "No": -5})
        
        # Convert to probabilities
        try:
            predictions = []
            for logprob_dict in logprobs:
                yes_logprob = logprob_dict.get("Yes", -10)
                no_logprob = logprob_dict.get("No", -10)
                
                yes_prob = np.exp(yes_logprob)
                no_prob = np.exp(no_logprob)
                
                total = yes_prob + no_prob + 1e-10
                yes_prob_normalized = yes_prob / total
                
                predictions.append(yes_prob_normalized)
            
            predictions = np.array(predictions)
            print(f"✓ Processed {len(predictions)} predictions")
            print(f"  Mean: {predictions.mean():.4f}, Std: {predictions.std():.4f}")
            
            return predictions
            
        except Exception as e:
            print(f"✗ Failed to process predictions: {e}")
            return None
    
    def _calculate_metrics_and_save(self, df: pd.DataFrame, predictions: np.ndarray, 
                                   is_training: bool, using_external: bool) -> Dict[str, Any]:
        """Calculate metrics and save results"""
        from sklearn.metrics import roc_auc_score, classification_report
        
        results = {
            'predictions': predictions.tolist(),  # Convert to list for JSON serialization
            'num_predictions': len(predictions),
            'mean_prediction': float(predictions.mean()),
            'std_prediction': float(predictions.std()),
            'execution_time': str(datetime.now() - self.start_time)
        }
        
        # Calculate metrics if training
        if is_training and 'rule_violation' in df.columns:
            auc_score = roc_auc_score(df['rule_violation'], predictions)
            results['auc_score'] = float(auc_score)
            
            print(f"\n✓ AUC Score: {auc_score:.6f}")
            
            binary_predictions = (predictions > 0.5).astype(int)
            print("\nClassification Report:")
            print(classification_report(df['rule_violation'], binary_predictions,
                                      target_names=['No Violation', 'Violation']))
        
        # Save results
        model_name = 'qwen_7b'
        
        # Save predictions
        ensemble_predictions = pd.DataFrame({
            'row_id': df['row_id'],
            'rule_violation': predictions
        })
        ensemble_path = os.path.join(self.ensemble_dir, f'{model_name}_predictions.csv')
        ensemble_predictions.to_csv(ensemble_path, index=False)
        print(f"\n✓ Predictions saved to: {ensemble_path}")
        
        # Save metadata (exclude predictions array to keep file small)
        metadata = {
            'model': model_name,
            'timestamp': datetime.now().isoformat(),
            'model_path': self.model_path,
            'processor_type': 'external' if using_external else 'fallback',
            'num_predictions': results['num_predictions'],
            'mean_prediction': results['mean_prediction'],
            'std_prediction': results['std_prediction'],
            'execution_time': results['execution_time']
        }
        
        if 'auc_score' in results:
            metadata['auc_score'] = results['auc_score']
            
        metadata_path = os.path.join(self.ensemble_dir, f'{model_name}_metadata.json')
        with open(metadata_path, 'w') as f:
            json.dump(metadata, f, indent=2)
        print(f"✓ Metadata saved to: {metadata_path}")
        
        # Save submission
        submission_path = os.path.join(self.ensemble_dir, f'{model_name}_submission.csv')
        ensemble_predictions.to_csv(submission_path, index=False)
        print(f"✓ Submission saved to: {submission_path}")
        
        return results
    
    def cleanup(self):
        """Clean up model resources"""
        print("\n" + "="*50)
        print("CLEANUP")
        print("="*50)
        
        if hasattr(self, 'model') and self.model is not None:
            del self.model
        
        if hasattr(self, 'tokenizer') and self.tokenizer is not None:
            del self.tokenizer
        
        gc.collect()
        
        # Clean GPUs
        if torch.cuda.is_available():
            self.gpu_manager.reset_all_gpus(aggressive=False)
        
        # Kill distributed processes
        kill_distributed_processes()
        
        print("✓ Resources cleaned up")


def main():
    """Main execution function"""
    print("="*80)
    print("QWEN 7B MODEL - JIGSAW COMPETITION (FIXED)")
    print("="*80)
    print(f"\nExecution started at: {datetime.now()}")
    
    # Configuration
    model_path = "/kaggle/input/jigsaw-acrc-qwen7b-v01"
    data_path = "/kaggle/input/jigsaw-agile-community-rules/test.csv" \
                if os.getenv('KAGGLE_IS_COMPETITION_RERUN') \
                else "/kaggle/input/jigsaw-agile-community-rules/train.csv"
    
    # Load data
    print("\n" + "="*50)
    print("DATA LOADING")
    print("="*50)
    
    df = pd.read_csv(data_path)
    is_training = not os.getenv('KAGGLE_IS_COMPETITION_RERUN')
    
    print(f"Environment: {'Competition' if not is_training else 'Development'}")
    print(f"Dataset shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    
    # Initialize and run model
    runner = QwenModelRunner(model_path)
    
    try:
        results = runner.run_inference(df, is_training)
        
        if results:
            print("\n" + "="*80)
            print("EXECUTION COMPLETE - SUCCESS")
            print("="*80)
            print(f"Total execution time: {results['execution_time']}")
            print(f"Predictions generated: {results['num_predictions']}")
            if 'auc_score' in results:
                print(f"AUC Score: {results['auc_score']:.6f}")
            
            gpu_config = results.get('gpu_configuration', {})
            if gpu_config:
                print(f"\nGPU Configuration:")
                print(f"  GPUs used: {gpu_config.get('gpus_used', 'Unknown')}")
                print(f"  Multi-GPU: {'Yes' if gpu_config.get('multi_gpu', False) else 'No'}")
        else:
            print("\n" + "="*80)
            print("EXECUTION FAILED")
            print("="*80)
            
    except Exception as e:
        print(f"\n✗ Unexpected error: {e}")
        import traceback
        traceback.print_exc()
        
    finally:
        runner.cleanup()
        print(f"\nTotal runtime: {datetime.now() - runner.start_time}")


if __name__ == "__main__":
    main()

QWEN 7B MODEL - JIGSAW COMPETITION (FIXED)

Execution started at: 2025-07-28 05:41:25.830734

DATA LOADING
Environment: Development
Dataset shape: (2029, 9)
Columns: ['row_id', 'body', 'rule', 'subreddit', 'positive_example_1', 'positive_example_2', 'negative_example_1', 'negative_example_2', 'rule_violation']

ENVIRONMENT CONFIGURATION
  Set CUDA_VISIBLE_DEVICES=0,1
  Set OMP_NUM_THREADS=1
  Set MKL_NUM_THREADS=1
  Set TOKENIZERS_PARALLELISM=false
  Set VLLM_WORKER_MULTIPROC_METHOD=spawn
  Set VLLM_ATTENTION_BACKEND=XFORMERS
  Removed MASTER_ADDR
  Removed MASTER_PORT
  Removed RANK
  Removed WORLD_SIZE
  Removed LOCAL_RANK
  Removed NCCL_SOCKET_IFNAME
  Removed NCCL_IB_DISABLE
  Removed NCCL_P2P_DISABLE

✓ Environment configured for single-node execution

SYSTEM RESOURCE CHECK
✓ CPU Memory: 18.7 GB available
✓ Found 2 GPU(s)

GPU CONFIGURATION ANALYSIS

Current GPU Status:
--------------------------------------------------------------------------------
 GPU                 Name    To

2025-07-28 05:41:56.633407: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753681316.654516     319 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753681316.660903     319 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


INFO 07-28 05:42:02 [__init__.py:235] Automatically detected platform cuda.
[1;36m(VllmWorkerProcess pid=319)[0;0m INFO 07-28 05:42:02 [multiproc_worker_utils.py:226] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=319)[0;0m INFO 07-28 05:42:03 [cuda.py:326] Using XFormers backend.


[W728 05:42:14.504873230 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:42:14.993526194 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:42:24.513733327 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:42:34.524280357 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


✗ Initialization failed: Invariant encountered: value was None when it should not be
Waiting 5 seconds before retry...
[1;36m(VllmWorkerProcess pid=319)[0;0m ERROR 07-28 05:42:34 [multiproc_worker_utils.py:239] Exception in worker VllmWorkerProcess while processing method init_device.
[1;36m(VllmWorkerProcess pid=319)[0;0m ERROR 07-28 05:42:34 [multiproc_worker_utils.py:239] Traceback (most recent call last):
[1;36m(VllmWorkerProcess pid=319)[0;0m ERROR 07-28 05:42:34 [multiproc_worker_utils.py:239]   File "/usr/local/lib/python3.11/dist-packages/vllm/executor/multiproc_worker_utils.py", line 233, in _run_worker_process
[1;36m(VllmWorkerProcess pid=319)[0;0m ERROR 07-28 05:42:34 [multiproc_worker_utils.py:239]     output = run_method(worker, method, args, kwargs)
[1;36m(VllmWorkerProcess pid=319)[0;0m ERROR 07-28 05:42:34 [multiproc_worker_utils.py:239]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[1;36m(VllmWorkerProcess pid=319)[0;0m ERROR 07-28 05:42:34 [mu

2025-07-28 05:42:44.757634: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753681364.779634     338 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753681364.785936     338 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[rank1]:[W728 05:42:45.537392266 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[rank1]:[W728 05:42:35.527377954 TCPStore.cpp:125] [c10d] recvValue failed on SocketImpl(fd=48, addr=[fdff:ffff::fe:2204:f125:a757]:46717, remote=[fdff:ffff:0:0:5f00::]:15412): failed to recv, got 0 bytes
Exception raised from recvBytes at /pytorch/torch/csrc/distributed/c10d/Utils.hpp:678 (most recent call first):
fr

INFO 07-28 05:42:50 [__init__.py:235] Automatically detected platform cuda.
[1;36m(VllmWorkerProcess pid=338)[0;0m INFO 07-28 05:42:50 [multiproc_worker_utils.py:226] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=338)[0;0m INFO 07-28 05:42:51 [cuda.py:326] Using XFormers backend.


[W728 05:43:02.784833009 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:43:02.218842458 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:43:12.790541085 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[W728 05:43:22.799353063 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3


✗ Initialization failed: Invariant encountered: value was None when it should not be
Waiting 10 seconds before retry...
[1;36m(VllmWorkerProcess pid=338)[0;0m ERROR 07-28 05:43:23 [multiproc_worker_utils.py:239] Exception in worker VllmWorkerProcess while processing method init_device.
[1;36m(VllmWorkerProcess pid=338)[0;0m ERROR 07-28 05:43:23 [multiproc_worker_utils.py:239] Traceback (most recent call last):
[1;36m(VllmWorkerProcess pid=338)[0;0m ERROR 07-28 05:43:23 [multiproc_worker_utils.py:239]   File "/usr/local/lib/python3.11/dist-packages/vllm/executor/multiproc_worker_utils.py", line 233, in _run_worker_process
[1;36m(VllmWorkerProcess pid=338)[0;0m ERROR 07-28 05:43:23 [multiproc_worker_utils.py:239]     output = run_method(worker, method, args, kwargs)
[1;36m(VllmWorkerProcess pid=338)[0;0m ERROR 07-28 05:43:23 [multiproc_worker_utils.py:239]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[1;36m(VllmWorkerProcess pid=338)[0;0m ERROR 07-28 05:43:23 [m

[rank1]:[W728 05:43:33.810095118 socket.cpp:200] [c10d] The hostname of the client socket cannot be retrieved. err=-3
[rank1]:[W728 05:43:23.800675843 TCPStore.cpp:125] [c10d] recvValue failed on SocketImpl(fd=48, addr=[fdff:ffff::55:2ed7:a113:a937]:49473, remote=[fdff:ffff:0:0:5f00::]:12286): failed to recv, got 0 bytes
Exception raised from recvBytes at /pytorch/torch/csrc/distributed/c10d/Utils.hpp:678 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x98 (0x7b9a913785e8 in /usr/local/lib/python3.11/dist-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x5ba8bfe (0x7b9a7aacdbfe in /usr/local/lib/python3.11/dist-packages/torch/lib/libtorch_cpu.so)
frame #2: <unknown function> + 0x5baaf40 (0x7b9a7aacff40 in /usr/local/lib/python3.11/dist-packages/torch/lib/libtorch_cpu.so)
frame #3: <unknown function> + 0x5bab84a (0x7b9a7aad084a in /usr/local/lib/python3.11/dist-pac

✗ Initialization failed: world group already initialized with a different world size
✗ All initialization attempts failed
✗ Failed to initialize model

EXECUTION FAILED

CLEANUP

Resetting GPU 0...

Resetting GPU 1...
✓ Resources cleaned up

Total runtime: 0:02:30.804686


In [4]:
"""
Ensemble Helper Function
This script loads all prediction files from the ensemble_files folder and creates a final submission
"""

import os
import pandas as pd
import numpy as np
import json
from datetime import datetime

def create_ensemble_submission(ensemble_dir='ensemble_files', output_file='submission.csv', method='average'):
    """
    Create final submission by combining all predictions in the ensemble directory
    
    Parameters:
    -----------
    ensemble_dir : str
        Directory containing the prediction files
    output_file : str
        Path for the final submission file
    method : str
        Ensemble method ('average', 'weighted', or 'single')
    
    Returns:
    --------
    pd.DataFrame : The final submission dataframe
    """
    
    print("="*80)
    print("ENSEMBLE SUBMISSION CREATOR")
    print("="*80)
    print(f"Ensemble directory: {ensemble_dir}")
    print(f"Output file: {output_file}")
    print(f"Method: {method}")
    
    # Check if ensemble directory exists
    if not os.path.exists(ensemble_dir):
        raise ValueError(f"Ensemble directory '{ensemble_dir}' not found!")
    
    # Find all prediction files
    prediction_files = [f for f in os.listdir(ensemble_dir) if f.endswith('_predictions.csv')]
    
    if not prediction_files:
        raise ValueError(f"No prediction files found in '{ensemble_dir}'")
    
    print(f"\nFound {len(prediction_files)} prediction file(s):")
    for f in prediction_files:
        print(f"  - {f}")
    
    # Load all predictions
    all_predictions = {}
    metadata_info = {}
    
    for file in prediction_files:
        model_name = file.replace('_predictions.csv', '')
        file_path = os.path.join(ensemble_dir, file)
        
        # Load predictions
        df = pd.read_csv(file_path)
        all_predictions[model_name] = df
        
        # Try to load metadata
        metadata_file = os.path.join(ensemble_dir, f'{model_name}_metadata.json')
        if os.path.exists(metadata_file):
            with open(metadata_file, 'r') as f:
                metadata_info[model_name] = json.load(f)
        
        print(f"\nLoaded {model_name}:")
        print(f"  - Shape: {df.shape}")
        print(f"  - Mean prediction: {df['rule_violation'].mean():.4f}")
        print(f"  - Std deviation: {df['rule_violation'].std():.4f}")
        
        if model_name in metadata_info:
            meta = metadata_info[model_name]
            if 'auc_score' in meta and meta['auc_score']:
                print(f"  - AUC score: {meta['auc_score']:.4f}")
            if 'execution_time' in meta:
                print(f"  - Execution time: {meta['execution_time']}")
    
    # Create ensemble based on method
    print(f"\n{'='*50}")
    print("CREATING ENSEMBLE")
    print(f"{'='*50}")
    
    if len(all_predictions) == 1 or method == 'single':
        # Single model case
        model_name = list(all_predictions.keys())[0]
        print(f"Using single model: {model_name}")
        submission = all_predictions[model_name].copy()
        
    elif method == 'average':
        # Simple average ensemble
        print("Creating average ensemble")
        
        # Get reference dataframe
        reference_df = all_predictions[list(all_predictions.keys())[0]]
        submission = pd.DataFrame({'row_id': reference_df['row_id']})
        
        # Collect all predictions
        pred_columns = []
        for model_name, df in all_predictions.items():
            # Ensure alignment by row_id
            merged = pd.merge(submission[['row_id']], df, on='row_id', how='left')
            pred_columns.append(merged['rule_violation'].values)
        
        # Average predictions
        submission['rule_violation'] = np.mean(pred_columns, axis=0)
        
        print(f"Averaged {len(all_predictions)} model predictions")
        
    elif method == 'weighted':
        # Weighted average based on AUC scores (if available)
        print("Creating weighted ensemble")
        
        # Check if we have AUC scores
        auc_scores = {}
        for model_name, meta in metadata_info.items():
            if 'auc_score' in meta and meta['auc_score']:
                auc_scores[model_name] = meta['auc_score']
        
        if len(auc_scores) < len(all_predictions):
            print("Warning: Not all models have AUC scores, falling back to simple average")
            method = 'average'
            # Recursive call with average method
            return create_ensemble_submission(ensemble_dir, output_file, 'average')
        
        # Calculate weights based on AUC scores
        total_auc = sum(auc_scores.values())
        weights = {model: auc/total_auc for model, auc in auc_scores.items()}
        
        print("\nModel weights:")
        for model, weight in weights.items():
            print(f"  - {model}: {weight:.3f} (AUC: {auc_scores[model]:.4f})")
        
        # Get reference dataframe
        reference_df = all_predictions[list(all_predictions.keys())[0]]
        submission = pd.DataFrame({'row_id': reference_df['row_id']})
        
        # Weighted average
        weighted_sum = np.zeros(len(submission))
        for model_name, df in all_predictions.items():
            merged = pd.merge(submission[['row_id']], df, on='row_id', how='left')
            weighted_sum += merged['rule_violation'].values * weights[model_name]
        
        submission['rule_violation'] = weighted_sum
        
    else:
        raise ValueError(f"Unknown ensemble method: {method}")
    
    # Ensure predictions are in valid range
    submission['rule_violation'] = submission['rule_violation'].clip(0, 1)
    
    # Save submission
    submission.to_csv(output_file, index=False)
    print(f"\n✓ Submission saved to: {output_file}")
    
    # Display statistics
    print(f"\n{'='*50}")
    print("SUBMISSION STATISTICS")
    print(f"{'='*50}")
    print(f"Number of predictions: {len(submission)}")
    print(f"Mean prediction: {submission['rule_violation'].mean():.4f}")
    print(f"Std deviation: {submission['rule_violation'].std():.4f}")
    print(f"Min prediction: {submission['rule_violation'].min():.4f}")
    print(f"Max prediction: {submission['rule_violation'].max():.4f}")
    
    # Show distribution
    print("\nPrediction distribution:")
    very_confident_yes = (submission['rule_violation'] > 0.9).sum()
    confident_yes = ((submission['rule_violation'] > 0.7) & (submission['rule_violation'] <= 0.9)).sum()
    uncertain = ((submission['rule_violation'] >= 0.3) & (submission['rule_violation'] <= 0.7)).sum()
    confident_no = ((submission['rule_violation'] >= 0.1) & (submission['rule_violation'] < 0.3)).sum()
    very_confident_no = (submission['rule_violation'] < 0.1).sum()
    
    total = len(submission)
    print(f"  Very confident YES (>0.9): {very_confident_yes} ({very_confident_yes/total*100:.1f}%)")
    print(f"  Confident YES (0.7-0.9): {confident_yes} ({confident_yes/total*100:.1f}%)")
    print(f"  Uncertain (0.3-0.7): {uncertain} ({uncertain/total*100:.1f}%)")
    print(f"  Confident NO (0.1-0.3): {confident_no} ({confident_no/total*100:.1f}%)")
    print(f"  Very confident NO (<0.1): {very_confident_no} ({very_confident_no/total*100:.1f}%)")
    
    # Display sample predictions
    print("\nSample predictions:")
    print(submission.head(10))
    
    print("\n" + "="*80)
    print("ENSEMBLE COMPLETE")
    print("="*80)
    
    return submission


def quick_check_ensemble(ensemble_dir='ensemble_files'):
    """
    Quick function to check what's available in the ensemble directory
    """
    print(f"\nChecking ensemble directory: {ensemble_dir}")
    
    if not os.path.exists(ensemble_dir):
        print(f"Directory '{ensemble_dir}' does not exist")
        return
    
    files = os.listdir(ensemble_dir)
    
    print(f"\nFiles in directory:")
    for file in sorted(files):
        file_path = os.path.join(ensemble_dir, file)
        size = os.path.getsize(file_path) / 1024  # Size in KB
        print(f"  - {file} ({size:.1f} KB)")
    
    # Check for prediction files
    prediction_files = [f for f in files if f.endswith('_predictions.csv')]
    print(f"\nFound {len(prediction_files)} prediction file(s)")
    
    # Check for metadata files
    metadata_files = [f for f in files if f.endswith('_metadata.json')]
    print(f"Found {len(metadata_files)} metadata file(s)")
    
    # Show model summary
    if metadata_files:
        print("\nModel summaries:")
        for meta_file in metadata_files:
            with open(os.path.join(ensemble_dir, meta_file), 'r') as f:
                meta = json.load(f)
                model_name = meta.get('model', 'Unknown')
                auc = meta.get('auc_score', 'N/A')
                if auc != 'N/A':
                    auc = f"{auc:.4f}"
                print(f"  - {model_name}: AUC={auc}")


if __name__ == "__main__":
    # Check what's available
    quick_check_ensemble()
    
    # Create ensemble submission
    # For single model, any method will work
    submission_df = create_ensemble_submission(
        ensemble_dir='ensemble_files',
        output_file='submission.csv',
        method='average'  # or 'single' for explicit single model handling
    )


Checking ensemble directory: ensemble_files

Files in directory:
  - llama_8b_detailed_analysis.csv (2015.7 KB)
  - llama_8b_metadata.json (0.5 KB)
  - llama_8b_predictions.csv (46.7 KB)
  - llama_8b_submission.csv (46.7 KB)

Found 1 prediction file(s)
Found 1 metadata file(s)

Model summaries:
  - llama_8b: AUC=0.9565
ENSEMBLE SUBMISSION CREATOR
Ensemble directory: ensemble_files
Output file: submission.csv
Method: average

Found 1 prediction file(s):
  - llama_8b_predictions.csv

Loaded llama_8b:
  - Shape: (2029, 2)
  - Mean prediction: 0.5287
  - Std deviation: 0.3075
  - AUC score: 0.9565
  - Execution time: 0:08:38.347051

CREATING ENSEMBLE
Using single model: llama_8b

✓ Submission saved to: submission.csv

SUBMISSION STATISTICS
Number of predictions: 2029
Mean prediction: 0.5287
Std deviation: 0.3075
Min prediction: 0.0311
Max prediction: 0.9724

Prediction distribution:
  Very confident YES (>0.9): 215 (10.6%)
  Confident YES (0.7-0.9): 662 (32.6%)
  Uncertain (0.3-0.7): 456 