In [None]:
import json

# Step 1: Tokenization
def tokenize_input(input_text):
    return input_text.split()

# Step 2: Named Entity Recognition (NER)
def extract_entities(tokens):
    house_type = None
    rooms = {}
    spatial_relationships = []

    # Define room types and keywords
    room_types = ["bedroom", "kitchen", "living room", "bathroom"]
    spatial_keywords = ["next to", "on", "left", "right", "adjacent"]

    i = 0
    while i < len(tokens):
        token = tokens[i].lower()

        # Extract house type (e.g., 3BHK)
        if "bhk" in token:
            house_type = token

        # Extract room types and relationships
        if token in room_types:
            room_name = token
            if i + 1 < len(tokens) and tokens[i + 1].isdigit():  # e.g., Bedroom 2
                room_name += " " + tokens[i + 1]
                i += 1
            rooms[room_name] = {}

        # Extract spatial relationships
        if token in spatial_keywords:
            if token == "next" and i + 1 < len(tokens) and tokens[i + 1] == "to":
                spatial_relationships.append(("next to", tokens[i - 1], tokens[i + 2]))
                i += 2
            elif token == "on" and i + 2 < len(tokens) and tokens[i + 1] == "the":
                spatial_relationships.append(("position", tokens[i - 1], tokens[i + 2]))
                i += 2

        i += 1

    return house_type, rooms, spatial_relationships

# Step 3: Convert to Structured JSON Format
def create_json_structure(house_type, rooms, spatial_relationships):
    structured_data = {
        "house_type": house_type,
        "rooms": rooms
    }

    for relationship in spatial_relationships:
        rel_type, room1, room2 = relationship
        room1 = room1.lower()  # Ensure room names are lowercase
        room2 = room2.lower()  # Ensure room names are lowercase

        if room1 in structured_data["rooms"]:
            if rel_type == "next to":
                structured_data["rooms"][room1]["adjacent"] = room2
                if room2 in structured_data["rooms"]:
                    structured_data["rooms"][room2]["adjacent"] = room1
            elif rel_type == "position":
                structured_data["rooms"][room1]["position"] = room2

    return json.dumps(structured_data, indent=4)

# Main Function
def main():
    input_text = "5BHK house with a bathtroom next to Bedroom 2 and the living room on the left side."

    # Step 1: Tokenization
    tokens = tokenize_input(input_text)
    print("Tokens:", tokens)

    # Step 2: Named Entity Recognition (NER)
    house_type, rooms, spatial_relationships = extract_entities(tokens)
    print("House Type:", house_type)
    print("Rooms:", rooms)
    print("Spatial Relationships:", spatial_relationships)

    # Step 3: Convert to Structured JSON Format
    structured_json = create_json_structure(house_type, rooms, spatial_relationships)
    print("Structured JSON:\n", structured_json)

# Run the program
if __name__ == "__main__":
    main()

Tokens: ['5BHK', 'house', 'with', 'a', 'bathtroom', 'next', 'to', 'Bedroom', '2', 'and', 'the', 'living', 'room', 'on', 'the', 'left', 'side.']
House Type: 5bhk
Rooms: {'bedroom 2': {}}
Spatial Relationships: [('position', 'room', 'left')]
Structured JSON:
 {
    "house_type": "5bhk",
    "rooms": {
        "bedroom 2": {}
    }
}
