# Logic Grid Puzzle Solver - Test 100 Puzzles
This notebook solves logic grid puzzles (Zebra puzzles) using Constraint Satisfaction Problem (CSP) techniques.

## Features
- Backtracking search with MRV heuristic
- Forward checking
- Arc consistency (AC-3)
- Natural language clue parsing

In [287]:
# Import required libraries
import pandas as pd
import json
import re
from typing import Dict, List, Set, Tuple, Optional, Any
from collections import defaultdict, deque
import copy
import time

## CSP Solver Classes
Core CSP implementation with variables, constraints, and solver logic.

In [288]:
class CSPVariable:
    """Represents a variable in the CSP (e.g., House1_color, House2_pet)"""
    
    def __init__(self, name: str, domain: List[Any]):
        self.name = name
        self.domain = set(domain)
        self.value = None
        
    def is_assigned(self) -> bool:
        return self.value is not None
    
    def assign(self, value: Any):
        if value not in self.domain:
            raise ValueError(f"Value {value} not in domain of {self.name}")
        self.value = value
        
    def unassign(self):
        self.value = None


class Constraint:
    """Base class for constraints"""
    
    def __init__(self, variables: List[str]):
        self.variables = variables
        self.constraint_checks = 0
        
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        raise NotImplementedError
        
    def get_involved_variables(self) -> List[str]:
        return self.variables


class AllDifferentConstraint(Constraint):
    """All variables must have different values"""
    
    def __init__(self, variables: List[str], attribute: str = None):
        super().__init__(variables)
        self.attribute = attribute
        
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        assigned_values = []
        
        for var in self.variables:
            if var in assignment and assignment[var] is not None:
                if self.attribute:
                    if isinstance(assignment[var], dict) and self.attribute in assignment[var]:
                        assigned_values.append(assignment[var][self.attribute])
                else:
                    assigned_values.append(assignment[var])
        
        return len(assigned_values) == len(set(assigned_values))


class UnaryConstraint(Constraint):
    """Constraint on a single variable"""
    
    def __init__(self, variable: str, attribute: str, value: Any):
        super().__init__([variable])
        self.variable = variable
        self.attribute = attribute
        self.value = value
        
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        
        if self.variable not in assignment or assignment[self.variable] is None:
            return True
            
        val = assignment[self.variable]
        if isinstance(val, dict):
            return val.get(self.attribute) == self.value
        
        return val == self.value


class CSP:
    """Constraint Satisfaction Problem"""
    
    def __init__(self):
        self.variables: Dict[str, CSPVariable] = {}
        self.constraints: List[Constraint] = []
        self.neighbors: Dict[str, Set[str]] = defaultdict(set)
        
    def add_variable(self, var: CSPVariable):
        self.variables[var.name] = var
        
    def add_constraint(self, constraint: Constraint):
        self.constraints.append(constraint)
        
        for var in constraint.variables:
            for other_var in constraint.variables:
                if var != other_var:
                    self.neighbors[var].add(other_var)
                    
    def get_constraints_for_variable(self, var_name: str) -> List[Constraint]:
        return [c for c in self.constraints if var_name in c.variables]
    
    def is_consistent(self, var_name: str, value: Any, assignment: Dict[str, Any]) -> bool:
        test_assignment = assignment.copy()
        test_assignment[var_name] = value
        
        for constraint in self.get_constraints_for_variable(var_name):
            if not constraint.is_satisfied(test_assignment):
                return False
                
        return True
    
    def get_all_constraint_checks(self) -> int:
        return sum(c.constraint_checks for c in self.constraints)

In [289]:
def order_domain_values(self, var: CSPVariable, assignment: Dict[str, Any]) -> List[Any]:
        """Deterministic domain ordering (sorted) for stable solutions"""
        return sorted(var.domain)


## Advanced Constraint Classes
Specialized constraints for parsing natural language clues.

In [290]:
class ImplicationConstraint(Constraint):
    """If house has val1 for attr1, it must have val2 for attr2 (or must NOT have val2 if exclude=True)"""
    
    def __init__(self, house_num: int, attr1: str, val1: str, attr2: str, val2: str, target_house: int = None, exclude: bool = False, bidirectional: bool = True):
        self.house_num = house_num
        self.attr1 = attr1
        self.val1 = val1
        self.attr2 = attr2
        self.val2 = val2
        self.target_house = target_house if target_house is not None else house_num
        self.exclude = exclude  # If True, val1 implies NOT val2
        self.bidirectional = bidirectional  # If False, only val1 -> val2, not val2 -> val1
        self.variables = [f"House{house_num}_{attr1}", f"House{self.target_house}_{attr2}"]
        self.constraint_checks = 0
    
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        
        var1_name = self.variables[0]
        var2_name = self.variables[1]
        
        val1_assigned = var1_name in assignment
        val2_assigned = var2_name in assignment
        
        if not val1_assigned and not val2_assigned:
            return True
        
        if self.exclude:
            # Exclusion constraint: val1 implies NOT val2
            if self.house_num == self.target_house:
                if val1_assigned and assignment[var1_name] == self.val1:
                    if val2_assigned:
                        return assignment[var2_name] != self.val2
                    return True
                if self.bidirectional and val2_assigned and assignment[var2_name] == self.val2:
                    if val1_assigned:
                        return assignment[var1_name] != self.val1
                    return True
            return True
        
        # Original implication logic
        if self.house_num == self.target_house:
            # Forward implication: val1 -> val2
            if val1_assigned and assignment[var1_name] == self.val1:
                if val2_assigned:
                    return assignment[var2_name] == self.val2
                return True
            
            # Reverse implication: val2 -> val1 (only if bidirectional)
            if self.bidirectional and val2_assigned and assignment[var2_name] == self.val2:
                if val1_assigned:
                    return assignment[var1_name] == self.val1
                return True
            
            # Contrapositive of forward: NOT val2 -> NOT val1 (only if bidirectional)
            if self.bidirectional and val2_assigned and assignment[var2_name] != self.val2:
                if val1_assigned:
                    return assignment[var1_name] != self.val1
                return True
            
            # Note: We don't include contrapositive of reverse (NOT val1 -> NOT val2)
            # because that would be the forward exclusion, which is a different constraint
        else:
            if val1_assigned and assignment[var1_name] == self.val1:
                if val2_assigned:
                    return assignment[var2_name] == self.val2
                return True
            if self.bidirectional and val2_assigned and assignment[var2_name] == self.val2:
                if val1_assigned:
                    return assignment[var1_name] == self.val1
                return True
        
        return True
    
    def get_involved_variables(self) -> List[str]:
        return self.variables


class PositionalConstraint(Constraint):
    """Constraint for positional relationships between values"""
    
    def __init__(self, constraint_type: str, val1: str, type1: str, val2: str, type2: str, distance: int = None, num_houses: int = 5):
        self.constraint_type = constraint_type
        self.val1 = val1
        self.type1 = type1
        self.val2 = val2
        self.type2 = type2
        self.distance = distance
        self.num_houses = num_houses
        self.variables = [f"House{i}_{type1}" for i in range(1, num_houses + 1)] + \
                        [f"House{i}_{type2}" for i in range(1, num_houses + 1)]
        self.constraint_checks = 0
        
        self.valid_positions_val1 = set(range(1, num_houses + 1))
        self.valid_positions_val2 = set(range(1, num_houses + 1))
        
        if constraint_type == 'left_of':
            self.valid_positions_val1.discard(num_houses)
            self.valid_positions_val2.discard(1)
        elif constraint_type == 'right_of':
            self.valid_positions_val1.discard(1)
            self.valid_positions_val2.discard(num_houses)
    
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        
        house1 = None
        house2 = None
        
        for i in range(1, self.num_houses + 1):
            var1 = f"House{i}_{self.type1}"
            var2 = f"House{i}_{self.type2}"
            
            if var1 in assignment and assignment[var1] == self.val1:
                house1 = i
            if var2 in assignment and assignment[var2] == self.val2:
                house2 = i
        
        if house1 is not None and house1 not in self.valid_positions_val1:
            return False
        if house2 is not None and house2 not in self.valid_positions_val2:
            return False
        
        if house1 is not None and house2 is not None:
            if self.constraint_type == 'left_of':
                return house1 < house2
            elif self.constraint_type == 'right_of':
                return house1 > house2
            elif self.constraint_type == 'adjacent':
                return abs(house1 - house2) == 1
            elif self.constraint_type == 'distance':
                return abs(house1 - house2) == self.distance + 1
        
        if self.constraint_type == 'left_of':
            if house1 == self.num_houses:
                return False
            if house2 == 1:
                return False
        elif self.constraint_type == 'right_of':
            if house1 == 1:
                return False
            if house2 == self.num_houses:
                return False
        
        return True
    
    def get_involved_variables(self) -> List[str]:
        return self.variables


class NotInPositionConstraint(Constraint):
    """Constraint that a value is NOT in a specific position"""
    
    def __init__(self, val: str, val_type: str, house_num: int):
        self.val = val
        self.val_type = val_type
        self.house_num = house_num
        self.variables = [f"House{house_num}_{val_type}"]
        self.constraint_checks = 0
    
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        
        var_name = self.variables[0]
        if var_name in assignment:
            return assignment[var_name] != self.val
        
        return True
    
    def get_involved_variables(self) -> List[str]:
        return self.variables


class CoLocationConstraint(Constraint):
    """Global constraint ensuring two values are in the same house"""
    
    def __init__(self, val1: str, type1: str, val2: str, type2: str, num_houses: int):
        self.val1 = val1
        self.type1 = type1
        self.val2 = val2
        self.type2 = type2
        self.num_houses = num_houses
        self.variables = [f"House{i}_{type1}" for i in range(1, num_houses + 1)] + \
                        [f"House{i}_{type2}" for i in range(1, num_houses + 1)]
        self.constraint_checks = 0
    
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        
        # Find which house has val1
        house_with_val1 = None
        for i in range(1, self.num_houses + 1):
            var_name = f"House{i}_{self.type1}"
            if var_name in assignment and assignment[var_name] == self.val1:
                house_with_val1 = i
                break
        
        # Find which house has val2
        house_with_val2 = None
        for i in range(1, self.num_houses + 1):
            var_name = f"House{i}_{self.type2}"
            if var_name in assignment and assignment[var_name] == self.val2:
                house_with_val2 = i
                break
        
        # If both are assigned, they must be in the same house
        if house_with_val1 is not None and house_with_val2 is not None:
            return house_with_val1 == house_with_val2
        
        # If only one is assigned, we can't determine yet
        return True
    
    def get_involved_variables(self) -> List[str]:
        return self.variables


class CoLocationExclusionConstraint(Constraint):
    """Global constraint ensuring two values are NOT in the same house"""
    
    def __init__(self, val1: str, type1: str, val2: str, type2: str, num_houses: int):
        self.val1 = val1
        self.type1 = type1
        self.val2 = val2
        self.type2 = type2
        self.num_houses = num_houses
        self.variables = [f"House{i}_{type1}" for i in range(1, num_houses + 1)] + \
                        [f"House{i}_{type2}" for i in range(1, num_houses + 1)]
        self.constraint_checks = 0
    
    def is_satisfied(self, assignment: Dict[str, Any]) -> bool:
        self.constraint_checks += 1
        
        # Find which house has val1
        house_with_val1 = None
        for i in range(1, self.num_houses + 1):
            var_name = f"House{i}_{self.type1}"
            if var_name in assignment and assignment[var_name] == self.val1:
                house_with_val1 = i
                break
        
        # Find which house has val2
        house_with_val2 = None
        for i in range(1, self.num_houses + 1):
            var_name = f"House{i}_{self.type2}"
            if var_name in assignment and assignment[var_name] == self.val2:
                house_with_val2 = i
                break
        
        # If both are assigned, they must be in DIFFERENT houses
        if house_with_val1 is not None and house_with_val2 is not None:
            return house_with_val1 != house_with_val2
        
        # If only one is assigned, we can't determine yet
        return True
    
    def get_involved_variables(self) -> List[str]:
        return self.variables

## Clue Parser
Parses natural language clues into CSP constraints.

In [291]:
class ClueParser:
    """Parse natural language clues into CSP constraints"""
    
    def __init__(self, entities: Dict[str, set], num_houses: int, names: Set[str] = None):
        self.entities = entities
        self.num_houses = num_houses
        self.names = names if names else set()
        
        # Create reverse lookup: value -> attribute_type
        self.value_to_type = {}
        for attr_type, values in entities.items():
            for value in values:
                self.value_to_type[value.lower()] = attr_type
    
    def parse_clues(self, puzzle_text: str) -> List[Any]:
        """Parse all clues from puzzle text into constraints"""
        constraints = []
        
        # Extract clue section
        clue_match = re.search(r'Clues?:?\s*(.*?)$', puzzle_text, re.DOTALL | re.IGNORECASE)
        if not clue_match:
            return constraints
        
        clues_text = clue_match.group(1)
        
        # Split into individual clues
        clue_lines = re.findall(r'\d+\.\s*(.+?)(?=\n\d+\.|\Z)', clues_text, re.DOTALL)
        
        for clue in clue_lines:
            clue = clue.strip()
            parsed = self.parse_single_clue(clue)
            if parsed:
                constraints.extend(parsed)
        
        return constraints
    
    def parse_single_clue(self, clue: str) -> List[Any]:
        """Parse a single clue into constraint(s)"""
        constraints = []
        clue_lower = clue.lower()
        
        # Extract all values from the clue
        clue_values = self.extract_values_from_text(clue_lower)
        
        # Check if this clue mentions a name
        mentioned_name = None
        for name in self.names:
            if name in clue_lower:
                mentioned_name = name
                break
        
        # PRIORITY 1: Position-based patterns with names
        # Pattern: "NAME lives in house n"
        if mentioned_name and ('in house' in clue_lower or 'lives in house' in clue_lower):
            house_num_match = re.search(r'(?:in|lives in) house (\d+)', clue_lower)
            if house_num_match:
                house_num = int(house_num_match.group(1))
                if 1 <= house_num <= self.num_houses:
                    constraints.append(('name_in_position', mentioned_name, house_num))
                    return constraints
        
        # PRIORITY 2: Position-based patterns (need only 1 value)
        # Pattern: "x is in house n" or "x lives in house n"
        if 'in house' in clue_lower or 'lives in house' in clue_lower:
            house_num_match = re.search(r'(?:in|lives in) house (\d+)', clue_lower)
            if house_num_match and clue_values:
                house_num = int(house_num_match.group(1))
                val, val_type = clue_values[0]
                if 1 <= house_num <= self.num_houses:
                    constraints.append(self.create_position_constraint(val, val_type, house_num))
                    return constraints
        
        # Pattern: "x is in the first/second/third house"
        ordinals = {'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5}
        for word, num in ordinals.items():
            if (f'in the {word}' in clue_lower or f'is in the {word}' in clue_lower) and 'not' not in clue_lower:
                if clue_values and num <= self.num_houses:
                    val, val_type = clue_values[0]
                    constraints.append(self.create_position_constraint(val, val_type, num))
                    return constraints
        
        # Pattern: "house n is painted X" or "house n is X"
        house_color_match = re.search(r'house (\d+) is (?:painted )?(\w+)', clue_lower)
        if house_color_match:
            house_num = int(house_color_match.group(1))
            color = house_color_match.group(2)
            if color in self.value_to_type:
                color_type = self.value_to_type[color]
                if 1 <= house_num <= self.num_houses:
                    constraints.append(self.create_position_constraint(color, color_type, house_num))
                    return constraints
        
        # Pattern: "the person in house N owns X"
        person_in_house_match = re.search(r'(?:person|friend) in house (\d+) owns? (?:the )?(\w+)', clue_lower)
        if person_in_house_match:
            house_num = int(person_in_house_match.group(1))
            item = person_in_house_match.group(2)
            if item in self.value_to_type:
                item_type = self.value_to_type[item]
                if 1 <= house_num <= self.num_houses:
                    constraints.append(self.create_position_constraint(item, item_type, house_num))
                    return constraints
        
        # PRIORITY 3: Negative constraints (CHECK THESE BEFORE POSITIVE PATTERNS!)
        # Pattern: "NAME does not live in the Y house" where Y is a color/attribute
        if mentioned_name and ('does not live in the' in clue_lower or 'does not live in' in clue_lower) and 'house' in clue_lower:
            # Extract attribute value
            attr_val = None
            attr_type = None
            
            for val, val_type in clue_values:
                if val_type != 'name' and val_type in self.entities:
                    attr_val = val
                    attr_type = val_type
                    break
            
            if attr_val and attr_type:
                constraints.append(('name_not_with_attr', mentioned_name, attr_val, attr_type))
                return constraints
        
        # Pattern: "NAME owns X" or "NAME lives in the X house"
        # These link a name to an attribute (POSITIVE - must come AFTER negative checks)
        if mentioned_name:
            # Pattern: "NAME owns X"
            name_owns_match = re.search(rf'\b{mentioned_name}\b.*?owns? (?:the )?(\w+)', clue_lower)
            if name_owns_match:
                item = name_owns_match.group(1)
                if item in self.value_to_type:
                    item_type = self.value_to_type[item]
                    constraints.append(('name_owns_attr', mentioned_name, item, item_type))
                    return constraints
            
            # Pattern: "NAME lives in the X house" where X is a color/attribute
            name_lives_match = re.search(rf'\b{mentioned_name}\b.*?lives? in the (\w+) house', clue_lower)
            if name_lives_match:
                attr = name_lives_match.group(1)
                if attr in self.value_to_type:
                    attr_type = self.value_to_type[attr]
                    constraints.append(('name_has_attr', mentioned_name, attr, attr_type))
                    return constraints
        
        # Pattern: "x is not in house y" or "x does not live in house y"
        if 'not in' in clue_lower or 'does not live in house' in clue_lower:
            house_num_match = re.search(r'not (?:live )?in (?:house )?(\d+)', clue_lower)
            if house_num_match and clue_values:
                house_num = int(house_num_match.group(1))
                val, val_type = clue_values[0]
                if 1 <= house_num <= self.num_houses:
                    constraints.append(('not_in_position', val, val_type, house_num))
                    return constraints
            
            # Handle ordinal positions
            for word, num in ordinals.items():
                if f'not in the {word}' in clue_lower or f'not live in the {word}' in clue_lower:
                    if clue_values and num <= self.num_houses:
                        val, val_type = clue_values[0]
                        constraints.append(('not_in_position', val, val_type, num))
                        return constraints
        
        # Need at least 2 values for other patterns
        if len(clue_values) < 2:
            return constraints
        
        # PRIORITY 4: Positional relationships
        # Pattern: "X house is immediately to the left of Y house"
        if 'immediately to the left of' in clue_lower:
            parts = clue_lower.split('immediately to the left of')
            if len(parts) == 2 and len(clue_values) >= 2:
                val1_part = parts[0]
                val2_part = parts[1]
                
                val1_candidates = [v for v, t in clue_values if v in val1_part]
                val2_candidates = [v for v, t in clue_values if v in val2_part]
                
                if val1_candidates and val2_candidates:
                    val1 = val1_candidates[-1]
                    val2 = val2_candidates[0]
                    type1 = self.value_to_type.get(val1)
                    type2 = self.value_to_type.get(val2)
                    
                    if val1 != val2 and type1 and type2:
                        constraints.append(('directly_left', val1, type1, val2, type2))
                        return constraints
        
        # Pattern: "X house is immediately to the right of Y house"
        if 'immediately to the right of' in clue_lower:
            parts = clue_lower.split('immediately to the right of')
            if len(parts) == 2 and len(clue_values) >= 2:
                val1_part = parts[0]
                val2_part = parts[1]
                
                val1_candidates = [v for v, t in clue_values if v in val1_part]
                val2_candidates = [v for v, t in clue_values if v in val2_part]
                
                if val1_candidates and val2_candidates:
                    val1 = val1_candidates[-1]
                    val2 = val2_candidates[0]
                    type1 = self.value_to_type.get(val1)
                    type2 = self.value_to_type.get(val2)
                    
                    if val1 != val2 and type1 and type2:
                        constraints.append(('directly_right', val1, type1, val2, type2))
                        return constraints
        
        # PRIORITY 5: Same-house constraints
        # Pattern: "X house contains Y" or "Y house contains X"
        if 'contains the' in clue_lower or 'house contains' in clue_lower:
            if len(clue_values) >= 2:
                val1, type1 = clue_values[0]
                val2, type2 = clue_values[1]
                if type1 != type2:
                    constraints.append(self.create_same_house_constraint(val1, type1, val2, type2))
                    return constraints
        
        # Default: assume same house for 2 values of different types
        # BUT ONLY if the clue doesn't contain negation words
        if len(clue_values) >= 2:
            # Check if clue contains negation - if so, DON'T create a same-house constraint
            if 'not' in clue_lower or 'does not' in clue_lower or "doesn't" in clue_lower:
                # This is a negative clue that we couldn't parse - skip default behavior
                return constraints
            
            val1, type1 = clue_values[0]
            val2, type2 = clue_values[1]
            if type1 != type2:
                constraints.append(self.create_same_house_constraint(val1, type1, val2, type2))
        
        return constraints
    
    def extract_values_from_text(self, text: str) -> List[Tuple[str, str]]:
        """Extract all known values from text and their types"""
        found_values = []
        matched_positions = set()
        
        # Sort values by length (longest first)
        sorted_values = sorted(self.value_to_type.keys(), key=len, reverse=True)
        
        for value in sorted_values:
            start = 0
            while True:
                pos = text.find(value, start)
                if pos == -1:
                    break
                
                end_pos = pos + len(value)
                value_positions = set(range(pos, end_pos))
                if not value_positions.intersection(matched_positions):
                    value_type = self.value_to_type[value]
                    if value not in [v for v, t in found_values]:
                        found_values.append((value, value_type))
                        matched_positions.update(value_positions)
                    break
                
                start = pos + 1
        
        return found_values
        
    def create_same_house_constraint(self, val1: str, type1: str, val2: str, type2: str):
        return ('same_house', val1, type1, val2, type2)
    
    def create_adjacent_constraint(self, val1: str, type1: str, val2: str, type2: str):
        return ('adjacent', val1, type1, val2, type2)
    
    def create_position_constraint(self, val: str, val_type: str, position: int):
        var_name = f"House{position}_{val_type}"
        return UnaryConstraint(var_name, None, val)


## Puzzle Parser
Parses puzzle text and extracts entities, then builds the CSP.

In [292]:
# Replace the PuzzleParser class definition with this corrected version:

class PuzzleParser:
    """
    Parser optimized for 3x3 logic puzzles using a dataset-specific Knowledge Base.
    Fixes the 'orange' -> 'ange' corruption bug AND the space-splitting regression.
    """
    
    # KNOWLEDGE BASE: Extracted directly from Test_100_Puzzles.csv
    KNOWLEDGE_BASE = {
        'name': {
            'bob', 'grace', 'carol', 'niaj', 'judy', 'eve', 'frank', 
            'mallory', 'alice', 'ivan', 'heidi', 'david'
        },
        'color': {
            'white', 'red', 'yellow', 'orange', 'blue', 'purple', 'green'
        },
        'pet': {
            'bird', 'turtle', 'fish', 'rabbit', 'hamster', 'dog', 'cat'
        }
    }

    def __init__(self):
        pass
        
    def parse_puzzle(self, puzzle_data: Dict[str, Any]) -> Tuple[CSP, Dict[str, Any]]:
        size = puzzle_data.get('size', '5*6')
        puzzle_text = puzzle_data.get('puzzle', '')
        
        # 1. Parse size (default to 3 houses for this dataset if unspecified)
        if '*' in size:
            num_houses, num_features = map(int, size.split('*'))
        else:
            num_houses = 3
            num_features = 3
        
        # 2. Extract Entities (Hybrid: Regex + Knowledge Base)
        entities = self.extract_entities(puzzle_text, num_houses)
        
        # 3. Create CSP Variables
        csp = CSP()
        
        # Create variables for each House and Attribute
        # e.g., House1_color, House1_pet, House1_name
        for i in range(1, num_houses + 1):
            for attr_type, attr_values in entities.items():
                var_name = f"House{i}_{attr_type}"
                # Use the extracted domain for this attribute
                var = CSPVariable(var_name, list(attr_values))
                csp.add_variable(var)
        
        # 4. Add Constraints
        # A. All-Different Constraints (e.g., House1_color != House2_color)
        for attr_type in entities.keys():
            house_vars = [f"House{i}_{attr_type}" for i in range(1, num_houses + 1)]
            csp.add_constraint(AllDifferentConstraint(house_vars))
        
        # B. Parse Natural Language Clues
        # We pass the full set of names so the parser recognizes them in text
        all_names = entities.get('name', set())
        
        clue_parser = ClueParser(entities, num_houses, all_names)
        parsed_constraints = clue_parser.parse_clues(puzzle_text)
        
        # Add the parsed constraints to the CSP
        for constraint_data in parsed_constraints:
            if isinstance(constraint_data, UnaryConstraint):
                csp.add_constraint(constraint_data)
            else:
                # Convert tuple representations to actual Constraint objects
                created_constraints = self.create_constraint_from_tuple(constraint_data, num_houses, entities)
                if isinstance(created_constraints, list):
                    for c in created_constraints:
                        if c: csp.add_constraint(c)
                elif created_constraints:
                    csp.add_constraint(created_constraints)
        
        metadata = {
            'num_houses': num_houses,
            'num_features': num_features,
            'entities': entities,
            'names': all_names,
            'puzzle_text': puzzle_text
        }
        
        return csp, metadata
    
    def extract_entities(self, puzzle_text: str, num_houses: int) -> Dict[str, Set[str]]:
        """
        Extract entities using explicit definitions first, then falling back to the Knowledge Base.
        Includes SAFE string cleaning to prevent 'orange' -> 'ange' corruption.
        """
        entities = defaultdict(set)
        puzzle_lower = puzzle_text.lower()
        
        # --- STRATEGY 1: Explicit Regex Extraction ---
        # Look for "Colors: Red, Blue" or "The colors are red and blue"
        patterns = [
            r'(\w+):\s*([^.\n]+)',         # "Colors: Red, Blue"
            r'The (\w+) are:?\s*([^.\n]+)' # "The colors are: Red..."
        ]
        
        for pattern in patterns:
            matches = re.findall(pattern, puzzle_text, re.MULTILINE | re.IGNORECASE)
            for attr_name, values_str in matches:
                attr_name = attr_name.lower().strip()
                
                # Normalize keys
                if attr_name in ['friends', 'people', 'names', 'person']:
                    attr_name = 'name'
                if attr_name.endswith('s') and attr_name != 'names': 
                    attr_name = attr_name[:-1]

                # *** FIX: Robust Cleaning & Splitting ***
                
                # A. Replace " and " / " or " with commas safely (protects "orange")
                # Use \b to ensure we don't break "orange" (contains 'or')
                s = re.sub(r'\b(and|or)\b', ',', values_str, flags=re.IGNORECASE)
                
                # B. Split by comma first
                parts = s.split(',')
                
                # C. Handle space-separated lists (e.g. "Red Blue Green")
                final_parts = []
                for p in parts:
                    p = p.strip()
                    if ' ' in p:
                        # Split by whitespace if multiple words found
                        # (Safe for this dataset where all attributes are single words)
                        final_parts.extend(p.split())
                    else:
                        final_parts.append(p)
                
                clean_values = []
                for v in final_parts:
                    v = v.strip().lower()
                    # Remove quotes
                    v = v.replace('`', '').replace('"', '').replace("'", '').strip()
                    
                    if len(v) > 1:
                        clean_values.append(v)
                
                if clean_values:
                    entities[attr_name].update(clean_values)

        # --- STRATEGY 2: Knowledge Base Scanning ---
        # Scan the text for known words from our dataset-specific KB
        # This handles cases where explicit lists are missing or malformed.
        for category, known_values in self.KNOWLEDGE_BASE.items():
            found_values = set()
            for val in known_values:
                # Look for whole word matches in the text
                if re.search(rf'\b{val}\b', puzzle_lower):
                    found_values.add(val)
            
            if found_values:
                entities[category].update(found_values)

        # --- STRATEGY 3: Ensure ALL required categories exist ---
        # CRITICAL: Make sure we have all 3 categories (name, color, pet) even if none were found
        required_categories = ['name', 'color', 'pet']
        for category in required_categories:
            if category not in entities:
                entities[category] = set()
        
        # --- STRATEGY 4: Auto-Padding & Cleanup ---
        # Ensure every category has exactly `num_houses` items.
        for category in required_categories:
            current_vals = entities[category]
            
            # If too few values, pad with unknowns
            if len(current_vals) < num_houses:
                missing_count = num_houses - len(current_vals)
                for i in range(missing_count):
                    current_vals.add(f"unknown_{category}_{i+1}")
            
            # If too many values, prioritization strategy:
            # 1. Prefer values found in KB (Strategy 2)
            # 2. Prefer values found in explicit lists (Strategy 1)
            elif len(current_vals) > num_houses:
                # Convert to list for sorting
                val_list = list(current_vals)
                
                # Sort key: 0 if in KB, 1 if not (prioritize KB items)
                # This ensures "orange" (KB) beats "ange" (garbage) if both exist
                kb_set = self.KNOWLEDGE_BASE.get(category, set())
                val_list.sort(key=lambda x: 0 if x in kb_set else 1)
                
                entities[category] = set(val_list[:num_houses])
                
        return dict(entities)

    def create_constraint_from_tuple(self, constraint_data: tuple, num_houses: int, entities: Dict = None):
        # ... (Keep existing implementation) ...
        # Copied from previous context to ensure completeness
        constraint_type = constraint_data[0]
        
        if constraint_type == 'same_house':
            _, val1, type1, val2, type2 = constraint_data
            constraints_list = []
            for i in range(1, num_houses + 1):
                impl_constraint = ImplicationConstraint(i, type1, val1, type2, val2)
                constraints_list.append(impl_constraint)
            return constraints_list
        
        elif constraint_type == 'directly_left':
            _, val1, type1, val2, type2 = constraint_data
            constraints_list = []
            constraints_list.append(NotInPositionConstraint(val2, type2, 1))
            constraints_list.append(NotInPositionConstraint(val1, type1, num_houses))
            
            for i in range(1, num_houses):
                constraints_list.append(ImplicationConstraint(i, type1, val1, type2, val2, target_house=i+1))
                constraints_list.append(ImplicationConstraint(i+1, type2, val2, type1, val1, target_house=i))
            return constraints_list
        
        elif constraint_type == 'directly_right':
            _, val1, type1, val2, type2 = constraint_data
            constraints_list = []
            constraints_list.append(NotInPositionConstraint(val1, type1, 1))
            constraints_list.append(NotInPositionConstraint(val2, type2, num_houses))
            
            for i in range(2, num_houses + 1):
                constraints_list.append(ImplicationConstraint(i, type1, val1, type2, val2, target_house=i-1))
                constraints_list.append(ImplicationConstraint(i-1, type2, val2, type1, val1, target_house=i))
            return constraints_list
        
        elif constraint_type == 'left_of':
            _, val1, type1, val2, type2 = constraint_data
            return PositionalConstraint('left_of', val1, type1, val2, type2, num_houses=num_houses)
        
        elif constraint_type == 'right_of':
            _, val1, type1, val2, type2 = constraint_data
            return PositionalConstraint('right_of', val1, type1, val2, type2, num_houses=num_houses)
        
        elif constraint_type == 'adjacent':
            _, val1, type1, val2, type2 = constraint_data
            return PositionalConstraint('adjacent', val1, type1, val2, type2, num_houses=num_houses)
        
        elif constraint_type == 'distance':
            _, val1, type1, val2, type2, distance = constraint_data
            return PositionalConstraint('distance', val1, type1, val2, type2, distance=distance, num_houses=num_houses)
        
        elif constraint_type == 'name_in_position':
            _, name, house_num = constraint_data
            return UnaryConstraint(f"House{house_num}_name", None, name)
        
        elif constraint_type == 'name_owns_attr':
            _, name, attr_value, attr_type = constraint_data
            return CoLocationConstraint(name, 'name', attr_value, attr_type, num_houses)
        
        elif constraint_type == 'name_not_with_attr':
            _, name, attr_value, attr_type = constraint_data
            return CoLocationExclusionConstraint(name, 'name', attr_value, attr_type, num_houses)
        
        elif constraint_type == 'name_has_attr':
            _, name, attr_value, attr_type = constraint_data
            return CoLocationConstraint(name, 'name', attr_value, attr_type, num_houses)
        
        elif constraint_type == 'not_in_position':
            _, val, val_type, house_num = constraint_data
            return NotInPositionConstraint(val, val_type, house_num)
        
        return None

## Solution Formatting
Format CSP solutions into the expected JSON structure.

In [293]:
def format_grid_solution(assignment: Dict[str, str], metadata: Dict[str, Any] = None, puzzle_text: str = '') -> Dict[str, Any]:
    """Format CSP assignment into grid solution.
    metadata is optional; if missing, infer num_houses/attr types from variable names.
    Unknown placeholders become blank; real names are capitalized.
    """
    if metadata:
        num_houses = metadata.get('num_houses')
        entities = metadata.get('entities', {})
    else:
        # Infer from assignment keys
        house_nums = []
        attr_types = set()
        for key in assignment.keys():
            if key.startswith('House') and '_' in key:
                try:
                    prefix, attr = key.split('_', 1)
                    house_nums.append(int(prefix.replace('House', '')))
                    attr_types.add(attr)
                except ValueError:
                    continue
        num_houses = max(house_nums) if house_nums else 0
        entities = {a: set() for a in attr_types}

    grid = []
    headers = ['House', 'Name']
    attr_types = [t for t in entities.keys() if t != 'name']
    headers.extend([t.capitalize() for t in attr_types])

    for i in range(1, num_houses + 1):
        row = [str(i)]

        name_var = f"House{i}_name"
        if name_var in assignment:
            name_value = assignment[name_var]
            if name_value and str(name_value).lower().startswith('unknown'):
                row.append('')
            else:
                row.append(name_value.capitalize() if isinstance(name_value, str) else name_value)
        else:
            row.append('')

        for attr_type in attr_types:
            var_name = f"House{i}_{attr_type}"
            row.append(assignment.get(var_name, ''))

        grid.append(row)

    return {
        'headers': headers,
        'grid': grid
    }


In [None]:
def solve_puzzle(puzzle_data: Dict[str, Any]) -> Dict[str, Any]:
    """Solve a single logic grid puzzle using the validated solver.py logic."""
    try:
        from newww.newCode.solver import solve_puzzle_text

        puzzle_text = puzzle_data.get('puzzle', '')
        size_raw = str(puzzle_data.get('size', '')).strip()

        result = solve_puzzle_text(puzzle_text, size_raw)
        if result.get('status') != 'ok':
            return {'error': result.get('error', 'No solution found')}

        grid_solution = result.get('grid_solution', {})
        headers = grid_solution.get('header', [])
        rows_in = grid_solution.get('rows', [])

        # Post-process names: blank unknowns, capitalize real names
        rows_out = []
        for row in rows_in:
            house, name, color, pet = row
            if isinstance(name, str) and name.lower().startswith('unknown'):
                name_fmt = ''
            else:
                name_fmt = name.capitalize() if isinstance(name, str) else name
            rows_out.append([str(house), name_fmt, color, pet])

        return {
            'headers': headers,
            'grid': rows_out,
            'steps': result.get('steps'),
            'forward_check_steps': result.get('forward_check_steps'),
            'decision_steps': result.get('decision_steps'),
            'num_solutions': result.get('num_solutions')
        }

    except Exception as e:
        return {'error': f'Error solving puzzle: {str(e)}'}


## Main Execution
Load Test_100_Puzzles.csv and solve all puzzles.

In [295]:
# Load Test_100_Puzzles.parquet
df = pd.read_parquet('Test_100_Puzzles.parquet')

print(f"Loaded {len(df)} puzzles from Test_100_Puzzles.parquet")
print(f"Columns: {list(df.columns)}")
print(f"\nFirst puzzle:")
print(df.iloc[0]['puzzle'][:500] + "...")

Loaded 100 puzzles from Test_100_Puzzles.parquet
Columns: ['id', 'size', 'puzzle', 'created_at']

First puzzle:
Three friends live in three houses in a row, numbered 1 to 3. Each house is painted a different color and each friend owns a different pet.

Colors: orange, blue, green.
Pets: cat, turtle, dog.

Clues:
1. Mallory lives in the blue house.
2. Alice lives in house 3.
3. The orange house contains the turtle.
4. House 1 is painted orange.
5. Bob does not live in the blue house.
6. Mallory does not live in the orange house....


In [296]:
# Solve all puzzles with hybrid approach (no name variables, names inferred post-solve)
results = {}

for idx, row in df.iterrows():
    puzzle_id = row['id']
    puzzle_data = {
        'id': puzzle_id,
        'size': row['size'],
        'puzzle': row['puzzle']
    }
    
    result = solve_puzzle(puzzle_data)
    results[puzzle_id] = result

solved_count = sum(1 for r in results.values() if r is not None and 'headers' in r)
total_puzzles = len(results)
success_rate = (solved_count / total_puzzles * 100) if total_puzzles > 0 else 0

print(f"\nSolved {solved_count}/{total_puzzles} puzzles ({success_rate:.1f}%)")


Solved 100/100 puzzles (100.0%)


In [297]:
# Save results to results.json
output_file = 'results.json'

with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nResults saved to {output_file}")
print(f"File contains {len(results)} puzzle solutions")


Results saved to results.json
File contains 100 puzzle solutions


In [298]:
# Display a sample solution (first solved puzzle)
for puzzle_id, solution in results.items():
    if solution is not None and 'headers' in solution:
        print(f"\nSample Solution for {puzzle_id}:")
        print("=" * 80)
        
        headers = solution['headers']
        rows = solution['grid']
        
        # Calculate column widths
        col_widths = [len(str(h)) for h in headers]
        for row in rows:
            for i, cell in enumerate(row):
                col_widths[i] = max(col_widths[i], len(str(cell)))
        
        # Print table
        header_str = " | ".join(str(h).ljust(col_widths[i]) for i, h in enumerate(headers))
        print(f"| {header_str} |")
        print("|" + "-+-".join("-" * w for w in col_widths) + "|")
        
        for row in rows:
            row_str = " | ".join(str(cell).ljust(col_widths[i]) for i, cell in enumerate(row))
            print(f"| {row_str} |")
        
        print("=" * 80)
        break


Sample Solution for test-3x3-001:
| House | Name    | Color  | Pet    |
|------+---------+--------+-------|
| 1     | Bob     | orange | turtle |
| 2     | Mallory | blue   | cat    |
| 3     | Alice   | green  | dog    |


In [299]:
# TEST: Check if fixes work on test-3x3-002 (previously missing name column)
test_id = 'test-3x3-002'
test_row = df[df['id'] == test_id]

if not test_row.empty:
    test_data = {
        'id': test_row.iloc[0]['id'],
        'size': test_row.iloc[0]['size'],
        'puzzle': test_row.iloc[0]['puzzle']
    }
    
    print(f"Testing {test_id}...")
    print("=" * 80)
    print("Puzzle:")
    print(test_data['puzzle'][:400])
    print("\n" + "-" * 80)
    
    test_result = solve_puzzle(test_data)
    
    if 'error' not in test_result:
        print("\n✓ SOLVED!")
        print(f"\nHeaders: {test_result['headers']}")
        for row in test_result['grid']:
            print(f"  {row}")
        
        # Check if name column is present
        if 'Name' in test_result['headers']:
            print("\n✓✓✓ SUCCESS: Name column is now present! ✓✓✓")
        else:
            print("\n⚠ WARNING: Name column is still missing")
    else:
        print(f"\n✗ FAILED TO SOLVE: {test_result['error']}")
else:
    print(f"Puzzle {test_id} not found in dataset")

Testing test-3x3-002...
Puzzle:
Three friends live in three houses in a row, numbered 1 to 3. Each house is painted a different color and each friend owns a different pet.

Colors: red, blue, orange.
Pets: fish, bird, dog.

Clues:
1. The person in house 3 owns the bird.
2. The red house is immediately to the left of the orange house.
3. The blue house contains the dog.
4. The person in house 2 owns the fish.
5. Niaj does not liv

--------------------------------------------------------------------------------

✓ SOLVED!

Headers: ['House', 'Name', 'Color', 'Pet']
  ['1', 'Bob', 'blue', 'dog']
  ['2', 'Niaj', 'red', 'fish']
  ['3', '', 'orange', 'bird']

✓✓✓ SUCCESS: Name column is now present! ✓✓✓


In [300]:
# DIAGNOSTIC: Solve a single puzzle using the reference solver logic

test_id = 'test-3x3-001'
test_row = df[df['id'] == test_id]

if test_row.empty:
    print(f"Puzzle {test_id} not found in dataset")
else:
    puzzle_data = {
        'id': test_row.iloc[0]['id'],
        'size': test_row.iloc[0]['size'],
        'puzzle': test_row.iloc[0]['puzzle']
    }

    print(f"=== DIAGNOSTIC FOR {test_id} ===\n")
    print("PUZZLE TEXT:")
    print(puzzle_data['puzzle'])
    print("\n" + "="*80 + "\n")

    # Use the unified solve_puzzle (which delegates to solver.py logic)
    diag_result = solve_puzzle(puzzle_data)

    if 'error' in diag_result:
        print(f"✗ NO SOLUTION FOUND: {diag_result['error']}")
    else:
        print("✓ SOLVED!")
        print(f"Steps: {diag_result.get('steps')} (forward: {diag_result.get('forward_check_steps')}, decision: {diag_result.get('decision_steps')})")
        print(f"Num solutions considered: {diag_result.get('num_solutions')}")
        print("\nSOLUTION:")
        for row in diag_result['grid']:
            print(f"  {row}")


=== DIAGNOSTIC FOR test-3x3-001 ===

PUZZLE TEXT:
Three friends live in three houses in a row, numbered 1 to 3. Each house is painted a different color and each friend owns a different pet.

Colors: orange, blue, green.
Pets: cat, turtle, dog.

Clues:
1. Mallory lives in the blue house.
2. Alice lives in house 3.
3. The orange house contains the turtle.
4. House 1 is painted orange.
5. Bob does not live in the blue house.
6. Mallory does not live in the orange house.


✓ SOLVED!
Steps: 5 (forward: 4, decision: 1)
Num solutions considered: 2

SOLUTION:
  ['1', 'Bob', 'orange', 'turtle']
  ['2', 'Mallory', 'blue', 'cat']
  ['3', 'Alice', 'green', 'dog']


In [301]:
# Check what clues are being parsed
print("\n" + "="*80)
print("CHECKING CLUE PARSING:")
print("="*80 + "\n")

# Ensure metadata exists (parse the first puzzle if missing)
if 'metadata' not in globals():
    parser = PuzzleParser()
    sample_row = df.iloc[0]
    sample_data = {'size': sample_row['size'], 'puzzle': sample_row['puzzle']}
    _, metadata = parser.parse_puzzle(sample_data)

clue_parser = ClueParser(metadata['entities'], metadata['num_houses'], metadata['names'])
clues_text = """1. Mallory lives in the blue house.
2. Alice lives in house 3.
3. The orange house contains the turtle.
4. House 1 is painted orange.
5. Bob does not live in the blue house.
6. Mallory does not live in the orange house."""

for clue in clues_text.split('\n'):
    clue = clue.strip()
    if not clue:
        continue
    parsed = clue_parser.parse_single_clue(clue[3:])  # Skip "N. "
    print(f"{clue}")
    print(f"  -> Parsed as: {parsed}\n")


CHECKING CLUE PARSING:

1. Mallory lives in the blue house.
  -> Parsed as: [('name_has_attr', 'mallory', 'blue', 'color')]

2. Alice lives in house 3.
  -> Parsed as: [('name_in_position', 'alice', 3)]

3. The orange house contains the turtle.
  -> Parsed as: [('same_house', 'orange', 'color', 'turtle', 'pet')]

4. House 1 is painted orange.
  -> Parsed as: [<__main__.UnaryConstraint object at 0x000002058E89FA70>]

5. Bob does not live in the blue house.
  -> Parsed as: [('name_not_with_attr', 'bob', 'blue', 'color')]

6. Mallory does not live in the orange house.
  -> Parsed as: [('name_not_with_attr', 'mallory', 'orange', 'color')]

