In [5]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import io

# --- 1. Dataset Loading and Preparation ---
def load_data():
    FILE_NAME = 'dataset.csv'
    df = None
    try:
        with open(FILE_NAME, 'r') as f:
            df = pd.read_csv(f)
        print(f"Dataset '{FILE_NAME}' loaded successfully.")
    except Exception as e:
        print(f"Warning: Could not load '{FILE_NAME}' ({e}). Using fallback dataset.")
        # Minimal fallback dataset
        data = {
            'Animal': ['Dog', 'Cat', 'Lion', 'Eagle', 'Shark', 'Elephant', 'Frog', 'Bat'],
            'IsMammal': [1, 1, 1, 0, 0, 1, 0, 1],
            'CanFly': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsAquatic': [0, 0, 0, 0, 1, 0, 1, 0],
            'IsPet': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsCarnivore': [1, 1, 1, 1, 1, 0, 0, 0],
            'IsFoundInAfrica': [0, 0, 1, 0, 0, 1, 0, 0],
            'IsLarge': [0, 0, 1, 0, 1, 1, 0, 0],
            'HasFur': [1, 1, 1, 0, 0, 0, 0, 1],
            'CanBeDomesticated': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsDangerous': [0, 0, 1, 0, 1, 0, 0, 0],
            'IsHerbivore': [0, 0, 0, 0, 0, 1, 1, 0],
            'HasWings': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsNocturnal': [0, 1, 1, 0, 0, 0, 1, 1],
        }
        df = pd.DataFrame(data)

    df = df.drop_duplicates(subset=['Animal']).dropna()
    return df

# --- 2. Model Training ---
def train_model(df):
    X = df.drop('Animal', axis=1).astype(int)
    y = df['Animal']
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X, y)
    return model

# --- 3. Ask yes/no question ---
def ask_question(question):
    while True:
        response = input(f"\n{question} (y/n): ").strip().lower()
        if response in ['y','yes','1']: return 1
        elif response in ['n','no','0']: return 0
        else: print("Invalid input. Please answer with 'y' or 'n'.")

# --- 4. Interactive Game ---
def start_game(df, model):
    tree = model.tree_
    X_df = df.drop('Animal', axis=1).astype(int)
    feature_names = X_df.columns.tolist()

    # Feature-to-question dictionary
    feature_questions = {f: f.replace('Is','Is it ').replace('Can','Can it ').replace('Has','Does it have ')+'?' for f in feature_names}

    print("\n--- Welcome to the Animal Akinator Game! ---")
    
    def traverse_tree(node_index):
        # Leaf node check
        nonlocal model,df,X_df
        if tree.children_left[node_index] == tree.children_right[node_index]:
            predicted_index = np.argmax(tree.value[node_index][0])
            predicted_animal = model.classes_[predicted_index]
            print(f"\n--- My Guess ---\nI think the animal is a {predicted_animal}!")

            correct = ask_question("Was my guess correct?")
            if correct:
                print("Hooray! I guessed it!")
            else:
                print("Oops! Let's improve my knowledge.")
                # Show closest guesses
                pred_features = X_df.loc[df['Animal'] == predicted_animal].values[0]
                distances = ((X_df.values - pred_features) != 0).sum(axis=1)
                similarity_df = pd.DataFrame({'Animal': df['Animal'], 'Distance': distances})
                similarity_df = similarity_df[similarity_df['Animal'] != predicted_animal]
                closest_animals = similarity_df.sort_values('Distance').head(5)['Animal'].tolist()
                print("Here are some animals similar to my guess:")
                print(", ".join(closest_animals))
                
                # Get the correct animal
                correct_animal = input("Which animal were you thinking of? ").strip()
                animal_exists = correct_animal in df['Animal'].values
                
                # Get distinguishing question
                new_feature = input(f"Please give me a yes/no question that distinguishes a {correct_animal} from a {predicted_animal}: ").strip()
                
                # Add new feature if needed
                if new_feature not in df.columns:
                    df[new_feature] = 0
                # Add new animal if needed
                if not animal_exists:
                    new_row = {col:0 for col in df.columns if col != 'Animal'}
                    new_row['Animal'] = correct_animal
                    df = df.append(new_row, ignore_index=True)
                
                # Ask value of the new feature for correct animal
                value = ask_question(f"For {correct_animal}, is it true that {new_feature}?")
                df.loc[df['Animal'] == correct_animal, new_feature] = value

                # Save dataset
                df.to_csv('dataset.csv', index=False)
                print("Dataset updated with new animal/feature!")

                # Retrain model
                model = train_model(df)
                
            return

        # Non-leaf node: ask question
        feature_index = tree.feature[node_index]
        feature_name = feature_names[feature_index]
        question = feature_questions.get(feature_name, feature_name + '?')
        answer = ask_question(question)
        threshold = tree.threshold[node_index]

        if answer <= threshold:
            traverse_tree(tree.children_left[node_index])
        else:
            traverse_tree(tree.children_right[node_index])

    # Start from root
    traverse_tree(0)

# --- 5. Main Execution ---
if __name__ == "__main__":
    df = load_data()
    model = train_model(df)
    start_game(df, model)


Dataset 'dataset.csv' loaded successfully.

--- Welcome to the Animal Akinator Game! ---



Is it  it eight-legged? (y/n):  n

Is it Carnivore? (y/n):  y

Can it Fly? (y/n):  n

Is it Aquatic? (y/n):  n

Is it FoundInAfrica? (y/n):  n

Is it Dangerous? (y/n):  y

Is it Large? (y/n):  n



--- My Guess ---
I think the animal is a Wolf!



Was my guess correct? (y/n):  y


Hooray! I guessed it!
