In [3]:

import pandas as pd

In [2]:
# test_single_address.py

def test_single_address():
    # Initialize your parser
    model_path = "models/address_parser_dev/final_model"  # Replace with your actual model path
    
    try:
        print("Initializing parser...")
        parser = AddressParserInference(
            model_path=model_path,
            max_length=512,
            stride=50,
            use_fp16=True  # Set to False for debugging to avoid GPU issues
        )
        print("✓ Parser initialized successfully")
        
        # Test address
        test_address = "36 - 49, chapel street, London, se45 6pq"
        print(f"\nTesting address: '{test_address}'")
        
        # Make prediction
        result = parser.predict_single_address(test_address, row_index=0)
        
        # Print results
        print("\n" + "="*50)
        print("RESULTS:")
        print("="*50)
        
        if "error" in result:
            print(f"❌ ERROR: {result['error']}")
        else:
            print(f"✓ Original address: {result['original_address']}")
            print(f"✓ Number of entities found: {len(result['entities'])}")
            
            if result['entities']:
                print("\nEntities found:")
                for i, entity in enumerate(result['entities']):
                    print(f"  {i+1}. {entity['type']}: '{entity['text']}' (confidence: {entity['confidence']:.3f})")
                
                print("\nParsed components:")
                for key, value in result['parsed_components'].items():
                    print(f"  {key}: {value}")
            else:
                print("⚠️  No entities found")
        
        print("="*50)
        
    except Exception as e:
        print(f"❌ Failed to initialize or run parser: {str(e)}")
        import traceback
        traceback.print_exc()


test_single_address()

Initializing parser...
Loading model and tokenizer...
AddressParserInference initialized: cuda, FP16=True
✓ Parser initialized successfully

Testing address: '36 - 49, chapel street, London, se45 6pq'

RESULTS:
✓ Original address: 36 - 49, chapel street, London, se45 6pq
✓ Number of entities found: 3

Entities found:
  1. street_number: '36 - 49' (confidence: 1.000)
  2. street_name: 'chapel street' (confidence: 1.000)
  3. postcode: 'se45 6pq' (confidence: 1.000)

Parsed components:
  street_number: 36 - 49
  street_name: chapel street
  postcode: se45 6pq


In [5]:
test = pd.read_parquet("data/ocod_history_processed/OCOD_FULL_2015_10.parquet")

In [7]:
test['building_name'].unique()

array([None, 'great titchfield house', ' london house', ...,
       'royal pavilion', ' the business village', 'afe'], dtype=object)