In [10]:
#4-11-2025
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import io
import joblib

# --- 1. Dataset Loading and Preparation ---
def load_data():
    FILE_NAME = 'dataset.csv'
    df = None
    try:
        with open(FILE_NAME, 'r') as f:
            df = pd.read_csv(f)
        print(f"Dataset '{FILE_NAME}' loaded successfully.")
    except Exception as e:
        print(f"Warning: Could not load '{FILE_NAME}' ({e}). Using fallback dataset.")
        # Minimal fallback dataset
        data = {
            'Animal': ['Dog', 'Cat', 'Lion', 'Eagle', 'Shark', 'Elephant', 'Frog', 'Bat'],
            'IsMammal': [1, 1, 1, 0, 0, 1, 0, 1],
            'CanFly': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsAquatic': [0, 0, 0, 0, 1, 0, 1, 0],
            'IsPet': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsCarnivore': [1, 1, 1, 1, 1, 0, 0, 0],
            'IsFoundInAfrica': [0, 0, 1, 0, 0, 1, 0, 0],
            'IsLarge': [0, 0, 1, 0, 1, 1, 0, 0],
            'HasFur': [1, 1, 1, 0, 0, 0, 0, 1],
            'CanBeDomesticated': [1, 1, 0, 0, 0, 0, 0, 0],
            'IsDangerous': [0, 0, 1, 0, 1, 0, 0, 0],
            'IsHerbivore': [0, 0, 0, 0, 0, 1, 1, 0],
            'HasWings': [0, 0, 0, 1, 0, 0, 0, 1],
            'IsNocturnal': [0, 1, 1, 0, 0, 0, 1, 1],
        }
        df = pd.DataFrame(data)

    df = df.drop_duplicates(subset=['Animal']).dropna()
    return df

# --- 2. Model Training ---
def train_model(df):
    X = df.drop('Animal', axis=1).astype(int)
    y = df['Animal']
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X, y)
    return model

# --- 3. Ask yes/no question ---
def ask_question(question):
    while True:
        response = input(f"\n{question} (y/n): ").strip().lower()
        if response in ['y','yes','1']: return 1
        elif response in ['n','no','0']: return 0
        else: print("Invalid input. Please answer with 'y' or 'n'.")
            
def reverse_question(question):
    """
    Given a question, reverse it to map it back to the original column name.
    """
    question = question.lower().strip()
    
    # Reverse the question construction logic
    if question.startswith('is it '):
        feature_name = 'Is' + question[5:].capitalize()
    elif question.startswith('can it '):
        feature_name = 'Can' + question[7:].capitalize()
    elif question.startswith('does it have '):
        feature_name = 'Has' + question[13:].capitalize()
    else:
        raise ValueError(f"Invalid question format: {question}")
    feature_name=feature_name.strip('?')
    return feature_name


# --- 4. Interactive Game ---
def start_game(df, model):
    tree = model.tree_
    X_df = df.drop('Animal', axis=1).astype(int)
    feature_names = X_df.columns.tolist()

    # Feature-to-question dictionary
    feature_questions = {f: f.replace('Is','Is it ').replace('Can','Can it ').replace('Has','Does it have ')+'?' for f in feature_names}

    print("\n--- Welcome to Guessify! ---")
    
    def traverse_tree(node_index):
        # Leaf node check
        nonlocal model,df,X_df
        if tree.children_left[node_index] == tree.children_right[node_index]:
            predicted_index = np.argmax(tree.value[node_index][0])
            predicted_animal = model.classes_[predicted_index]
            print(f"\n--- My Guess ---\nI think the animal is a {predicted_animal}!")

            correct = ask_question("Was my guess correct?")
            if correct:
                print("Hooray! I guessed it!")
            else:
                print("Hmm, I might need a bit more info before learning something new.")
                
                # --- Try asking more questions before giving up ---
                asked_features = set()
                user_features = {}
            
                # Fill user_features with all features answered so far
                for f in feature_names:
                    user_features[f] = None
            
                # Keep track of which features we already asked (optional)
                # You could pass this in recursively if you want persistent tracking.
            
                remaining_features = [f for f in feature_names if f not in asked_features]
            
                # Ask up to 3 additional questions that weren't asked yet
                followup_limit = 3
                for feature in remaining_features[:followup_limit]:
                    question = feature_questions.get(feature, feature + '?')
                    answer = ask_question(question)
                    user_features[feature] = answer
                    asked_features.add(feature)
            
                # Convert answers to a prediction input
                x_input = np.array([[user_features.get(f, 0) for f in feature_names]])
                new_guess = model.predict(x_input)[0]
            
                print(f"\nLet me try again... Is it a {new_guess}?")
                correct2 = ask_question("Was my second guess correct?")
                if correct2:
                    print("Yay! Got it this time!")
                    return
                else:
                    print("Oops! Let's improve my knowledge.")
                    # Show closest guesses
                    pred_features = X_df.loc[df['Animal'] == predicted_animal].values[0]
                    distances = ((X_df.values - pred_features) != 0).sum(axis=1)
                    similarity_df = pd.DataFrame({'Animal': df['Animal'], 'Distance': distances})
                    similarity_df = similarity_df[similarity_df['Animal'] != predicted_animal]
                    closest_animals = similarity_df.sort_values('Distance').head(5)['Animal'].tolist()
                    print("Here are some animals similar to my guess:")
                    print(", ".join(closest_animals))
                    
                    # Get the correct animal
                    correct_animal = input("Which animal were you thinking of? ").strip()
                    animal_exists = correct_animal in df['Animal'].values
                    
                    # --- Nested function for distinguishing question ---
                    def ask_distinguish():
                        new_feature = input(
                            f"Please give me a yes/no question that distinguishes a {correct_animal} from a {predicted_animal}: "
                        ).strip()
                    
                        try:
                            feature_name = reverse_question(new_feature)
                        except ValueError as e:
                            print(f"Error: {e}. Please ensure the question starts with 'Is it ', 'Can it ', or 'Does it have '.")
                            return None, None  # gracefully handle bad input
                    
                        # Add new feature column if needed
                        if feature_name not in df.columns:
                            df[feature_name] = 0
                    
                        return new_feature, feature_name
                    
                    # --- Existing animal case ---
                    if animal_exists:
                        new_feature, feature_name = ask_distinguish()
                        if not new_feature:
                            return  # invalid input handled in ask_distinguish()
                    
                    # --- New animal case ---
                    else:
                        print("This animal doesn't exist in our dataset! Please fill out its attributes.")
                        new_row = {col: 0 for col in df.columns if col != 'Animal'}
                        new_row['Animal'] = correct_animal
                    
                        for feature in feature_names:
                            value = ask_question(f"Please provide the answer for {feature} (1 for Yes, 0 for No):")
                            new_row[feature] = value
                    
                        new_feature, feature_name = ask_distinguish()
                        if not new_feature:
                            return  # invalid input handled in ask_distinguish()
                        
                        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
                    
                    # Ask for the value of the new distinguishing feature
                    value = ask_question(f"For {correct_animal}, is it true that {new_feature}?")
                    df.loc[df['Animal'] == correct_animal, feature_name] = value
                    df[feature_name] = df[feature_name].fillna(0).astype(int)
                    # Save dataset and retrain
                    df.to_csv('dataset.csv', index=False)
                    print("Dataset updated with new animal/feature!")
                    model = train_model(df)

                
            return

        # Non-leaf node: ask question
        feature_index = tree.feature[node_index]
        feature_name = feature_names[feature_index]
        question = feature_questions.get(feature_name, feature_name + '?')
        answer = ask_question(question)
        threshold = tree.threshold[node_index]

        if answer <= threshold:
            traverse_tree(tree.children_left[node_index])
        else:
            traverse_tree(tree.children_right[node_index])

    # Start from root
    traverse_tree(0)

# --- 5. Main Execution ---
if __name__ == "__main__":
    df = load_data()
    model = train_model(df)
    joblib.dump(model, "tree.joblib")
    start_game(df, model)


Dataset 'dataset.csv' loaded successfully.

--- Welcome to Guessify! ---



Is it Mammal? (y/n):  y

Is it Nocturnal? (y/n):  n

Is it Aquatic? (y/n):  y

Is it Large? (y/n):  y

Is it FoundInAfrica? (y/n):  n

Is it Herbivore? (y/n):  n



--- My Guess ---
I think the animal is a Dolphin!



Was my guess correct? (y/n):  n


Hmm, I might need a bit more info before learning something new.



Is it Mammal? (y/n):  y

Can it Fly? (y/n):  n

Is it Aquatic? (y/n):  y





Let me try again... Is it a Walrus?



Was my second guess correct? (y/n):  n


Oops! Let's improve my knowledge.
Here are some animals similar to my guess:
Seal, Walrus, Shark, Otter, Platypus


Which animal were you thinking of?  Whale


This animal doesn't exist in our dataset! Please fill out its attributes.



Please provide the answer for IsMammal (1 for Yes, 0 for No): (y/n):  1

Please provide the answer for CanFly (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for IsAquatic (1 for Yes, 0 for No): (y/n):  y

Please provide the answer for IsPet (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for IsCarnivore (1 for Yes, 0 for No): (y/n):  y

Please provide the answer for IsFoundInAfrica (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for IsLarge (1 for Yes, 0 for No): (y/n):  y

Please provide the answer for HasFur (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for CanBeDomesticated (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for IsDangerous (1 for Yes, 0 for No): (y/n):  y

Please provide the answer for IsHerbivore (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for HasWings (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for IsNocturnal (1 for Yes, 0 for No): (y/n):  n

Please provide the answer for

Dataset updated with new animal/feature!


In [5]:
print(model.tree_.feature)  # This prints the feature indices at each node

[ 0  8  4  1 12 10  2 -2 -2  5 -2 -2 10 -2  2 -2 -2 12 10  9 -2 -2 -2 -2
  9  2 12  6 -2 -2 -2 11 -2 -2 14  6 13 -2 -2  5 -2 -2 -2  1 12  5 -2 -2
 -2 10 -2 -2 12  2  9  6 10 -2  7 -2  8 -2 -2  5  3 -2 -2  7 -2  8 -2 -2
  5 -2 -2  6  7 -2 -2  5 10 -2 -2 -2  7 -2  6  5  9  4  1  3  8 -2 -2 -2
 -2  3 -2 -2 -2 -2  5 -2 -2]
