In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import io

# --- 1. Dataset Loading and Preparation ---

def load_data():
    """
    Loads the animal dataset. Assumes 'animal_guessing_dataset.csv' is available
    and its content is accessible via the runtime environment.
    """
    try:
        # Load the uploaded file content using the file name directly.
        # In this environment, the pandas function is typically configured 
        # to automatically read the content of the uploaded file with this name.
        df = pd.read_csv('dataset.csv')
        print("Dataset 'animal_guessing_dataset.csv' loaded successfully.")

    except Exception as e:
        print(f"Warning: Could not access or load 'animal_guessing_dataset.csv' ({e}). Using a fallback dataset.")
        # Fallback dataset for demonstration if file access fails
        data = {
            'Animal': ['Dog', 'Cat', 'Eagle', 'Dolphin', 'Shark'],
            'IsMammal': [1, 1, 0, 1, 0],
            'CanFly': [0, 0, 1, 0, 0],
            'IsAquatic': [0, 0, 0, 1, 1],
            'IsPet': [1, 1, 0, 0, 0],
            'IsCarnivore': [1, 1, 1, 1, 1],
            'IsLarge': [0, 0, 0, 0, 0],
            'HasFur': [1, 1, 0, 0, 0],
            'IsDangerous': [0, 0, 0, 0, 1],
        }
        df = pd.DataFrame(data)

    # Clean up any potential duplicates or empty rows
    df = df.drop_duplicates(subset=['Animal']).dropna()

    # Define features (X) and target (y)
    # The target column is 'Animal'
    y = df['Animal']
    # The feature columns are all others except 'Animal'
    X = df.drop('Animal', axis=1)

    # Ensure all features are binary (0 or 1)
    X = X.astype(int)

    # Store feature names for later use
    feature_names = X.columns.tolist()
    class_names = y.unique().tolist()

    return X, y, feature_names, class_names

# --- 2. Model Training ---

def train_model(X, y):
    """
    Trains the Decision Tree Classifier.
    """
    # Use max_depth to keep the tree small enough for a manageable game
    model = DecisionTreeClassifier(random_state=42, max_depth=5)
    model.fit(X, y)
    print("Decision Tree Model trained successfully.")
    return model

# --- 3. Interactive Game Logic (Akinator Style) ---

def ask_question(question):
    """
    Prompts the user with a question and gets a valid binary response (y/n).
    """
    while True:
        # Prompt for 'y' (yes) or 'n' (no)
        response = input(f"\n{question} (y/n): ").strip().lower()
        if response in ['y', 'yes', '1']:
            return 1
        elif response in ['n', 'no', '0']:
            return 0
        else:
            print("Invalid input. Please answer with 'y' (yes) or 'n' (no).")

def start_game(model, feature_names, class_names):
    """
    Initiates and manages the interactive guessing game by traversing the decision tree.
    """
    tree = model.tree_
    current_node = 0  # Start at the root node
    print("\n--- Welcome to the Animal Akinator Game! ---")
    print("Think of an animal from the dataset, and I will try to guess it based on your answers.\n")

    # The recursive traversal function
    def traverse_tree(node_index):
        nonlocal current_node
        current_node = node_index

        # Check if we reached a leaf node (a guess)
        if tree.children_left[node_index] == tree.children_right[node_index]:
            # Leaf node: the prediction is the class with the highest sample count
            class_counts = tree.value[node_index][0]
            predicted_index = np.argmax(class_counts)
            predicted_animal = class_names[predicted_index]

            print(f"\n--- My Guess ---")
            print(f"Based on your answers, I guess the animal is a {predicted_animal}!")
            
            # Simple check if the game was correct
            correct = ask_question(f"Was my guess correct?")
            
            if correct == 1:
                print("Hooray! I guessed it!")
            else:
                print("Aww, I missed it. Maybe the model needs more data to distinguish that animal.")
            
            return

        # Not a leaf node: Ask the question corresponding to the feature at this node
        feature_index = tree.feature[node_index]
        feature_name = feature_names[feature_index]
        
        # Format the question better for the user
        question = feature_name.replace('Is', 'Is it').replace('Can', 'Can it').replace('Has', 'Does it have')
        question = question.replace('InAfrica', 'in Africa').replace('CanBeDomesticated', 'be domesticated')
        question = question.replace('Fur', 'fur') + '?'


        # Get user's answer
        user_answer = ask_question(question)

        # Determine the next node based on the user's answer and the tree's threshold
        # For binary features (0 or 1), a response of '1' (Yes) should lead to the right child (if threshold is < 1)
        # and a response of '0' (No) should lead to the left child.
        
        # Decision boundary: The tree splits if feature value <= threshold.
        # Since all features are 0/1, the threshold is often 0.5.
        threshold = tree.threshold[node_index]

        if user_answer <= threshold:
            # Answer '0' (No) or '1' (Yes) is on the left side of the split (e.g., <= 0.5 means 'No' if feature is 0)
            next_node = tree.children_left[node_index]
        else:
            # Answer '1' (Yes) is on the right side of the split (e.g., > 0.5 means 'Yes' if feature is 1)
            next_node = tree.children_right[node_index]

        # Recursively move to the next node
        traverse_tree(next_node)

    # Start the traversal from the root
    traverse_tree(current_node)

# --- 4. Main Execution ---

if __name__ == "__main__":
    try:
        # Load and prepare data
        X, y, feature_names, class_names = load_data()

        # Train the model
        model = train_model(X, y)

        # Start the interactive game
        start_game(model, feature_names, class_names)

    except Exception as e:
        print(f"\nAn error occurred during execution: {e}")
        print("Please ensure your dataset is correctly formatted with an 'Animal' column and binary feature columns (0/1).")


Dataset 'animal_guessing_dataset.csv' loaded successfully.
Decision Tree Model trained successfully.

--- Welcome to the Animal Akinator Game! ---
Think of an animal from the dataset, and I will try to guess it based on your answers.




Can itBeDomesticated? (y/n):  y

Can itFly? (y/n):  y



--- My Guess ---
Based on your answers, I guess the animal is a Bear_3!



Was my guess correct? (y/n):  n


Aww, I missed it. Maybe the model needs more data to distinguish that animal.
