In [6]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import io

# --- 1. Dataset Loading and Preparation ---

def load_data():
    """
    Loads the animal dataset. Assumes 'expanded_animal_data.csv' is available
    and its content is accessible via the runtime environment.
    """
    # Using the latest and largest dataset file name
    FILE_NAME = 'dataset.csv'
    csv_data = None
    df = None
    try:
        # Load the uploaded file content using the correct file name.
        # NOTE: This uses the correct mechanism for reading uploaded files in this environment.
        with open(FILE_NAME, 'r') as f:
            csv_data = f.read()

        df = pd.read_csv(io.StringIO(csv_data))
        print(f"Dataset '{FILE_NAME}' loaded successfully.")

    except Exception as e:
        print(f"Warning: Could not load '{FILE_NAME}' ({e}). Using an expanded fallback dataset.")
        # Expanded fallback dataset with 14 features
        data = {
            'Animal': ['Dog', 'Cat', 'Lion', 'Eagle', 'Shark', 'Elephant', 'Frog', 'Bat'],
            'IsMammal': [1, 1, 1, 0, 0, 1, 0, 1],
            'CanFly': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsAquatic': [0, 0, 0, 0, 1, 0, 1, 0],
            'IsPet': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsCarnivore': [1, 1, 1, 1, 1, 0, 0, 0],
            'IsFoundInAfrica': [0, 0, 1, 0, 0, 1, 0, 0],
            'IsLarge': [0, 0, 1, 0, 1, 1, 0, 0],
            'HasFur': [1, 1, 1, 0, 0, 0, 0, 1],
            'CanBeDomesticated': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsDangerous': [0, 0, 1, 0, 1, 0, 0, 0],
            'IsHerbivore': [0, 0, 0, 0, 0, 1, 1, 0],
            'HasWings': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsNocturnal': [0, 1, 1, 0, 0, 0, 1, 1],
        }
        df = pd.DataFrame(data)

    # Clean up any potential duplicates or empty rows
    df = df.drop_duplicates(subset=['Animal']).dropna()

    # Define features (X) and target (y)
    y = df['Animal']
    X = df.drop('Animal', axis=1)

    # Ensure all features are binary (0 or 1)
    X = X.astype(int)

    # Store feature names for later use
    feature_names = X.columns.tolist()
    class_names = y.unique().tolist()

    return X, y, feature_names, class_names

# --- 2. Model Training ---

def train_model(X, y):
    """
    Trains the Decision Tree Classifier.
    """
    # *** FIX: Removed max_depth=5 to allow the tree to grow until it perfectly separates all 78 animals ***
    # This prevents the model from stopping on ambiguous nodes and guessing incorrectly.
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X, y)
    print("Decision Tree Model trained successfully.")
    return model

# --- 3. Interactive Game Logic (Akinator Style) ---

def ask_question(question):
    """
    Prompts the user with a question and gets a valid binary response (y/n).
    """
    while True:
        # Prompt for 'y' (yes) or 'n' (no)
        response = input(f"\n{question} (y/n): ").strip().lower()
        if response in ['y', 'yes', '1']:
            return 1
        elif response in ['n', 'no', '0']:
            return 0
        else:
            print("Invalid input. Please answer with 'y' (yes) or 'n' (no).")

def start_game(model, feature_names, class_names):
    """
    Initiates and manages the interactive guessing game by traversing the decision tree.
    """
    tree = model.tree_
    current_node = 0  # Start at the root node
    print("\n--- Welcome to the Animal Akinator Game! ---")
    print(f"I know {len(class_names)} different animals! Think of one, and let's play.\n")

    # The recursive traversal function
    def traverse_tree(node_index):
        nonlocal current_node
        current_node = node_index

        # Check if we reached a leaf node (a guess)
        if tree.children_left[node_index] == tree.children_right[node_index]:
            # Leaf node: the prediction is the class with the highest sample count
            class_counts = tree.value[node_index][0]
            predicted_index = np.argmax(class_counts)
            predicted_animal = class_names[predicted_index]

            print(f"\n--- My Guess ---")
            print(f"Based on your answers, I guess the animal is a {predicted_animal}!")
            
            # Simple check if the game was correct
            correct = ask_question(f"Was my guess correct?")
            
            if correct == 1:
                print("Hooray! I guessed it!")
            else:
                print("Aww, I missed it. Maybe the model needs more data to distinguish that animal.")
            
            return

        # Not a leaf node: Ask the question corresponding to the feature at this node
        feature_index = tree.feature[node_index]
        feature_name = feature_names[feature_index]
        
        # Format the question better for the user
        try:
            question = feature_name
            if question.startswith('Is'):
                question = question.replace('Is', 'Is it ')
            elif question.startswith('Can'):
                question = question.replace('Can', 'Can it ')
            elif question.startswith('Has'):
                question = question.replace('Has', 'Does it have ')
            
            # Clean up common fragments
            question = question.replace('BeDomesticated', 'be domesticated')
            question = question.replace('Aquatic', 'aquatic')
            question = question.replace('Carnivore', 'carnivorous')
            question = question.replace('Herbivore', 'herbivorous')
            question = question.replace('Dangerous', 'dangerous')
            question = question.replace('Mammal', 'a mammal')
            question = question.replace('FoundInAfrica', 'found in Africa')
            question = question.replace('Fur', 'fur')
            question = question.replace('Wings', 'wings')
            question = question.replace('Nocturnal', 'nocturnal')
            
            question = question.strip() + '?'
        except:
            # Fallback if formatting fails
            question = feature_name + '?'


        # Get user's answer
        user_answer = ask_question(question)

        # Determine the next node based on the user's answer and the tree's threshold
        # For binary features (0 or 1), the threshold is usually 0.5.
        threshold = tree.threshold[node_index]

        if user_answer <= threshold:
            # Answer '0' (No) goes to the left child
            next_node = tree.children_left[node_index]
        else:
            # Answer '1' (Yes) goes to the right child
            next_node = tree.children_right[node_index]

        # Recursively move to the next node
        traverse_tree(next_node)

    # Start the traversal from the root
    traverse_tree(current_node)

# --- 4. Main Execution ---

if __name__ == "__main__":
    try:
        # Load and prepare data
        X, y, feature_names, class_names = load_data()

        # Train the model
        model = train_model(X, y)

        # Start the interactive game
        start_game(model, feature_names, class_names)

    except Exception as e:
        print(f"\nAn error occurred during execution: {e}")
        print("Please ensure your dataset is correctly formatted with an 'Animal' column and binary feature columns (0/1).")


Dataset 'dataset.csv' loaded successfully.
Decision Tree Model trained successfully.

--- Welcome to the Animal Akinator Game! ---
I know 78 different animals! Think of one, and let's play.




Is it Pet? (y/n):  n

Does it have fur? (y/n):  n

Is it a mammal? (y/n):  n

Is it nocturnal? (y/n):  n

Is it dangerous? (y/n):  y

Is it found in Africa? (y/n):  y

Is it Large? (y/n):  y



--- My Guess ---
Based on your answers, I guess the animal is a Eagle!



Was my guess correct? (y/n):  y


Hooray! I guessed it!
