In [None]:
def classify_ecg_file(file_path, model=None):
    """
    Simple function that classifies an ECG file (PDF or DAT) as Normal or Abnormal.
    
    Args:
        file_path (str): Path to the ECG file (.pdf or .dat)
        model: The trained model for classification (optional, will try to load if None)
        
    Returns:
        str: Classification result ("Normal", "Abnormal", or error message)
    """
    try:
        # Determine file type
        is_pdf = file_path.lower().endswith('.pdf')
        
        # Load model if not provided
        if model is None:
            try:
                model_path = 'voting_classifier.pkl'
                model = joblib.load(model_path)
            except:
                # Try to find model in current directory
                for file in os.listdir('.'):
                    if file.endswith('.pkl') and 'voting' in file.lower():
                        model = joblib.load(file)
                        break
                
                if model is None:
                    return "Error: Classification model not found"
        
        # Process the file
        if is_pdf:
            # Extract file name without extension for output
            base_name = os.path.splitext(os.path.basename(file_path))[0]
            output_dat = f"{base_name}_digitized.dat"
            
            # Digitize the PDF to a DAT file and get segment files
            dat_path, segment_files = digitize_ecg_from_pdf(
                pdf_path=file_path, 
                output_file=output_dat, 
                debug=False,
                save_segments=True
            )
        else:
            # For DAT files, split into segments
            segment_files = split_dat_into_segments(file_path, debug=False)
            
            if not segment_files:
                # If splitting failed, try classifying the whole file
                return classify_new_ecg(file_path, model, debug=False)
        
        # Classify each segment
        segment_results = []
        
        for segment_file in segment_files:
            # Get file path without extension
            segment_path = os.path.splitext(segment_file)[0]
            
            # Classify segment
            result = classify_new_ecg(segment_path, model, debug=False)
            segment_results.append(result)
            
            # Remove temporary segment file
            try:
                os.remove(segment_file)
            except:
                pass
        
        # Calculate final result based on segment classifications
        if segment_results:
            normal_count = segment_results.count("Normal")
            abnormal_count = segment_results.count("Abnormal")
            
            # Simple averaging approach
            if abnormal_count >= normal_count:
                return "Abnormal"
            else:
                return "Normal"
        else:
            return "Error: No valid segments to classify"
            
    except Exception as e:
        return f"Error: {str(e)}"
    finally:
        # Clean up temporary files
        if is_pdf and 'dat_path' in locals() and os.path.exists(dat_path):
            try:
                os.remove(dat_path)
            except:
                pass


result = classify_ecg_file("sample.pdf")
print(f"ECG Classification: {result}")

Successfully loaded classification model from voting_classifier.pkl

----- Testing PDF file classification with segment voting -----
Found test PDF file: sample4.pdf
Processing PDF file: sample4.pdf
Starting ECG digitization from PDF: sample4.pdf
Converted PDF to image: temp_ecg_image.jpg
Image dimensions: 2200x1700
Calibration parameters: {'seconds_per_pixel': 0.01015228426395939, 'mv_per_pixel': 0.012690355329949238}
Layer 1 boundaries: 600-879
Layer 2 boundaries: 880-1179
Layer 3 boundaries: 1180-1480
Processing layer 1...
  - Found 3917 contours
  - Selected contour with 2987 points
  - Layer 1 signal range: -0.66 mV to 0.93 mV
Processing layer 2...
  - Found 4824 contours
  - Selected contour with 2746 points
  - Layer 2 signal range: -0.26 mV to 0.61 mV
Processing layer 3...
  - Found 3948 contours
  - Selected contour with 2994 points
  - Layer 3 signal range: -0.22 mV to 0.69 mV
Saved segment 1 to sample4_digitized_segment1.dat
Saved segment 2 to sample4_digitized_segment2.dat
