In [13]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import numpy as np

# --- 1. Dataset Loading and Preparation ---
def load_data():
    FILE_NAME = 'dataset.csv'
    df = None
    try:
        df = pd.read_csv(FILE_NAME)
        print(f"Dataset '{FILE_NAME}' loaded successfully.")
    except Exception as e:
        print(f"Warning: Could not load '{FILE_NAME}' ({e}). Using fallback dataset.")
        # Minimal fallback dataset
        data = {
            'Animal': ['Dog', 'Cat', 'Lion', 'Eagle', 'Shark', 'Elephant', 'Frog', 'Bat'],
            'IsMammal': [1, 1, 1, 0, 0, 1, 0, 1],
            'CanFly': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsAquatic': [0, 0, 0, 0, 1, 0, 1, 0],
            'IsPet': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsCarnivore': [1, 1, 1, 1, 1, 0, 0, 0],
            'IsFoundInAfrica': [0, 0, 1, 0, 0, 1, 0, 0],
            'IsLarge': [0, 0, 1, 0, 1, 1, 0, 0],
            'HasFur': [1, 1, 1, 0, 0, 0, 0, 1],
            'CanBeDomesticated': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsDangerous': [0, 0, 1, 0, 1, 0, 0, 0],
            'IsHerbivore': [0, 0, 0, 0, 0, 1, 1, 0],
            'HasWings': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsNocturnal': [0, 1, 1, 0, 0, 0, 1, 1],
        }
        df = pd.DataFrame(data)

    df = df.drop_duplicates(subset=['Animal']).dropna()
    return df

# --- 2. Model Training ---
def train_model(X, y):
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X, y)
    return model

# --- 3. Ask yes/no question ---
def ask_question(question):
    while True:
        response = input(f"\n{question} (y/n): ").strip().lower()
        if response in ['y', 'yes', '1']: return 1
        elif response in ['n', 'no', '0']: return 0
        else: print("Invalid input. Please answer with 'y' or 'n'.")

# --- 4. Interactive Game ---
def start_game(df, model):
    """
    Interactive Akinator-style game with multi-guess logic and learning new features/animals.
    """
    X_df = df.drop('Animal', axis=1).astype(int)
    feature_names = X_df.columns.tolist()
    
    # Format features into readable questions
    def format_question(feature_name):
        q = feature_name
        if q.startswith('Is'):
            q = q.replace('Is', 'Is it ')
        elif q.startswith('Can'):
            q = q.replace('Can', 'Can it ')
        elif q.startswith('Has'):
            q = q.replace('Has', 'Does it have ')
        # Clean up common fragments
        q = q.replace('BeDomesticated', 'be domesticated')
        q = q.replace('Aquatic', 'aquatic')
        q = q.replace('Carnivore', 'carnivorous')
        q = q.replace('Herbivore', 'herbivorous')
        q = q.replace('Dangerous', 'dangerous')
        q = q.replace('Mammal', 'a mammal')
        q = q.replace('FoundInAfrica', 'found in Africa')
        q = q.replace('Fur', 'fur')
        q = q.replace('Wings', 'wings')
        q = q.replace('Nocturnal', 'nocturnal')
        return q.strip() + '?'

    # Recursive tree traversal
    def traverse_tree(node_index, asked_questions):
        tree = model.tree_
        # Leaf node
        if tree.children_left[node_index] == tree.children_right[node_index]:
            counts = tree.value[node_index][0]
            predicted_index = np.argmax(counts)
            return model.classes_[predicted_index]

        # Question for this node
        feature_index = tree.feature[node_index]
        feature_name = feature_names[feature_index]
        if feature_name in asked_questions:
            # Skip question if already asked
            # Randomly choose left or right if repeated (to prevent infinite loop)
            next_node = tree.children_left[node_index]
            return traverse_tree(next_node, asked_questions)
        
        asked_questions.add(feature_name)
        answer = ask_question(format_question(feature_name))
        threshold = tree.threshold[node_index]

        next_node = tree.children_right[node_index] if answer > threshold else tree.children_left[node_index]
        return traverse_tree(next_node, asked_questions)

    print(f"\n--- Welcome to the Animal Akinator Game! ---")
    print(f"I know {len(df)} animals! Think of one, and let's play.\n")

    remaining_animals = df['Animal'].tolist()
    asked_questions = set()
    
    while remaining_animals:
        guessed = traverse_tree(0, asked_questions)
        if guessed not in remaining_animals:
            guessed = remaining_animals[0]  # fallback

        print(f"\n--- My Guess ---\nI think the animal is a {guessed}!")
        correct = ask_question("Was my guess correct?")

        if correct:
            print("Hooray! I guessed it!")
            return
        else:
            # Remove wrong guess
            remaining_animals.remove(guessed)

            # Show top 3 closest guesses based on feature distance
            pred_features = X_df[df['Animal'] == guessed].values[0]
            mask = df['Animal'].isin(remaining_animals)
            remaining_features = X_df[mask].values
            distances = ((remaining_features - pred_features) != 0).sum(axis=1)
            similarity_df = pd.DataFrame({'Animal': np.array(remaining_animals), 'Distance': distances})
            similarity_df = similarity_df.sort_values('Distance')

            if not similarity_df.empty:
                next_best = similarity_df.iloc[0]['Animal']
                print(f"\nI was wrong. Let's try another guess: {next_best}")
                guessed = next_best
                remaining_animals.remove(next_best)
                correct = ask_question(f"Was my guess correct?")
                if correct:
                    print("Hooray! I guessed it!")
                    return
            else:
                break

    # If no correct guesses left, allow user to add new animal
    print("\nI couldn't guess your animal. Let's add it to my knowledge!")
    new_animal = input("Enter the name of your animal: ").strip()
    new_feature = input("Enter a distinguishing feature/question for this animal: ").strip()

    if new_feature not in X_df.columns:
        X_df[new_feature] = 0
        df[new_feature] = 0

    ans = ask_question(f"Does your animal have the feature '{new_feature}'?")
    # Add new row with default 0 features
    new_row = [0] * X_df.shape[1]
    # Set the new feature for the animal
    new_row[X_df.columns.get_loc(new_feature)] = ans
    df.loc[len(df)] = [new_animal] + new_row

    print("Thanks! I have updated my knowledge. You can play again now.")

# --- 5. Main Execution ---
if __name__ == "__main__":
    df = load_data()
    X_df = df.drop('Animal', axis=1).astype(int)
    y_df = df['Animal']
    model = train_model(X_df, y_df)
    start_game(df, model)


Dataset 'dataset.csv' loaded successfully.

--- Welcome to the Animal Akinator Game! ---
I know 78 animals! Think of one, and let's play.




Is it  it eight-legged? (y/n):  n

Is it carnivorous? (y/n):  y


KeyboardInterrupt: Interrupted by user