In [1]:
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder

# Dataset
data = {
    "Color": ["White", "Green", "Green", "White", "Green", "White", "White", "White"],
    "Legs": [3, 2, 3, 3, 2, 2, 2, 2],
    "Height": ["Short", "Tall", "Short", "Short", "Short", "Tall", "Tall", "Short"],
    "Smelly": ["Yes", "No", "Yes", "Yes", "No", "No", "No", "Yes"],
    "Species": ["M", "M", "M", "M", "H", "H", "H", "H"]
}

# Function to encode data
def encode_data(data, encoders=None):
    if encoders is None:
        encoders = {attr: LabelEncoder().fit(data[attr]) for attr in data}
    encoded_data = np.column_stack([encoders[attr].transform(data[attr]) for attr in data])
    return encoded_data, encoders

# Encode features
X, encoders = encode_data({attr: data[attr] for attr in data if attr != "Species"})

# Encode target separately to avoid the above issue
y_encoder = LabelEncoder()
y = y_encoder.fit_transform(data["Species"])

# Train the Gaussian Naive Bayes model
model = GaussianNB()
model.fit(X, y)

# Function to predict new entity
def predict_new_entity(model, new_entity, encoders, y_encoder):
    new_entity_encoded = [encoders[attr].transform([new_entity[attr]])[0] for attr in new_entity]
    predicted_species_encoded = model.predict([new_entity_encoded])
    predicted_species = y_encoder.inverse_transform(predicted_species_encoded)
    return predicted_species[0]

# Predict species for the new entity
new_entity = {"Color": "Green", "Legs": 2, "Height": "Tall", "Smelly": "No"}
predicted_species = predict_new_entity(model, new_entity, encoders, y_encoder)
print("Predicted Species:", predicted_species)


Predicted Species: H
