In [2]:
#v4 demo_myday

import pandas as pd
import re
from datetime import datetime


def parse_address(address):
    address = str(address) if pd.notnull(address) else ""
    results = {
        'unit': None,
        'number': None,
        'street': None,
        'suburb': None,
        'postcode': None,
        'state': None,
        'country': None
    }
    
    
    state_keywords = ['VIC', 'North VIC', 'South VIC', 'East VIC', 'Victoria', 'Kuala Lumpur', 'Beijing', 'Guangzhou', 'Guangdong']
    country_keywords = ['Australia', 'China']
    
    
    unit_pattern = re.compile(r'^(Unit\s*\d+/\d+|P\.O\.Box\s*\d+|RM\d+|Room\s*\d+|[A-Za-z]*\d+(\.\d+)?/[\dA-Za-z\-]+|\d+/\d+|[A-Za-z]*\d+-\d+)', re.IGNORECASE)
    postcode_pattern = re.compile(r'\b\d{4,6}\b')
    state_pattern = re.compile(r'\b(?:' + '|'.join(state_keywords) + r')\b', re.IGNORECASE)
    country_pattern = re.compile(r'\b(?:' + '|'.join(country_keywords) + r')\b', re.IGNORECASE)
    street_pattern = re.compile(r'\b(?:street|st|road|rd|pl|place|avenue|ave|blvd|boulevard|lane|ln|drive|dr)\b', re.IGNORECASE)
    
    
    number_pattern = re.compile(r'^[\d\s/.-]+')

    
    unit_match = unit_pattern.match(address)
    if unit_match:
        results['unit'] = unit_match.group(0)
        address = address.replace(results['unit'], '').strip()
    
    
    postcode_match = postcode_pattern.search(address)
    if postcode_match:
        results['postcode'] = postcode_match.group(0)
        address = address.replace(results['postcode'], '').strip()
    
    
    state_match = state_pattern.search(address)
    if state_match:
        results['state'] = state_match.group(0)
        address = address.replace(results['state'], '').strip()
    
    
    country_match = country_pattern.search(address)
    if country_match:
        results['country'] = country_match.group(0)
        address = address.replace(results['country'], '').strip()
    
    
    number_match = number_pattern.match(address)
    if number_match:
        results['number'] = number_match.group(0).strip()
        address = address.replace(results['number'], '').strip()

    
    street_match = street_pattern.search(address)
    if street_match:
        street_index = street_match.end()
        results['street'] = address[:street_index].strip()
        address = address[street_index:].strip()
    
    
    results['suburb'] = address.strip() if not results['suburb'] else results['suburb']
    
    return results


input_file_path = r'C:\Users\MSI\Documents\Migration Tool New\DB Tables\Contacts DB\final_combined_contacts_data.xlsx'
df = pd.read_excel(input_file_path)


columns = ['id', 'contact_id', 'building_name', 'unit', 'number', 'street', 'suburb', 'postcode', 'state', 'country', 'created_at', 'updated_at', 'contact_details_id']
output_data = []


current_timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')


id_counter = 1000001


for index, row in df.iterrows():
    address_data = parse_address(row.get('Inner_Physical Address', ''))
    
    new_row = {
        'id': id_counter,  # Auto-incrementing 'id'
        'contact_id': row.get('Row No'),  # 'contact_id' from 'Row No' column in the original file
        'building_name': None,
        'unit': address_data['unit'],
        'number': address_data['number'],
        'street': address_data['street'],
        'suburb': address_data['suburb'],
        'postcode': address_data['postcode'],
        'state': address_data['state'],
        'country': address_data['country'],
        'created_at': current_timestamp,
        'updated_at': current_timestamp,
        'contact_details_id': 'NULL'
    }
    
    output_data.append(new_row)
    id_counter += 1  # Increment the id for the next row


output_df = pd.DataFrame(output_data, columns=columns)


output_file_path = r'C:\Users\MSI\Documents\Migration Tool New\DB Tables\Contacts DB\contact_physical_addresses.xlsx'
output_df.to_excel(output_file_path, index=False)

print(f"Processed data has been saved to {output_file_path}")


Processed data has been saved to C:\Users\MSI\Documents\Migration Tool New\DB Tables\Contacts DB\contact_physical_addresses.xlsx
