In [7]:
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np

# 训练数据集
data = [
    ['Kristina', 'F', 1.6, 'Short'],
    ['Jim', 'M', 2.0, 'Tall'],
    ['Maggie', 'F', 1.9, 'Medium'],
    ['Martha', 'F', 1.88, 'Medium'],
    ['Stephanie', 'F', 1.7, 'Short'],
    ['Bob', 'M', 1.85, 'Medium'],
    ['Kathy', 'F', 1.6, 'Short'],
    ['Dave', 'M', 1.7, 'Short'],
    ['Worth', 'M', 2.2, 'Tall'],
    ['Steven', 'M', 2.1, 'Tall'],
    ['Debbie', 'F', 1.8, 'Medium'],
    ['Todd', 'M', 1.95, 'Medium'],
    ['Kim', 'F', 1.9, 'Medium'],
    ['Amy', 'F', 1.8, 'Medium'],
    ['Wynette', 'F', 1.75, 'Medium']
]

# 提取特征和标签
X_train = np.array([[row[1], row[2]] for row in data])
y_train = np.array([row[3] for row in data])

# 将性别特征进行独热编码
gender_encoder = OneHotEncoder()
X_gender = gender_encoder.fit_transform(X_train[:, 0].reshape(-1, 1)).toarray()

# 将身高特征添加到独热编码后的特征数组中
X_train = np.hstack((X_gender, X_train[:, 1].reshape(-1, 1).astype(float)))

# 将标签变量转化为数值表示
le_output = LabelEncoder()
y_train = le_output.fit_transform(y_train)

# 创建并拟合高斯朴素贝叶斯模型
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# 待预测的示例
t = ['Adam', 'M', 1.95]
X_test = np.array([[0, 1, t[2]]])  # 对待预测示例进行独热编码处理

# 进行预测
predicted_class = nb_model.predict(X_test)
predicted_label = le_output.inverse_transform(predicted_class)
print("X_train:", X_train)
print("y_train:", y_train)
print("独热编码后的性别特征：", X_gender)
print("合并后的特征数组：", X_train)
print("待预测示例的特征：", X_test)

print("预测结果:", predicted_label[0])


X_train: [[1.   0.   1.6 ]
 [0.   1.   2.  ]
 [1.   0.   1.9 ]
 [1.   0.   1.88]
 [1.   0.   1.7 ]
 [0.   1.   1.85]
 [1.   0.   1.6 ]
 [0.   1.   1.7 ]
 [0.   1.   2.2 ]
 [0.   1.   2.1 ]
 [1.   0.   1.8 ]
 [0.   1.   1.95]
 [1.   0.   1.9 ]
 [1.   0.   1.8 ]
 [1.   0.   1.75]]
y_train: [1 2 0 0 1 0 1 1 2 2 0 0 0 0 0]
独热编码后的性别特征： [[1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]]
合并后的特征数组： [[1.   0.   1.6 ]
 [0.   1.   2.  ]
 [1.   0.   1.9 ]
 [1.   0.   1.88]
 [1.   0.   1.7 ]
 [0.   1.   1.85]
 [1.   0.   1.6 ]
 [0.   1.   1.7 ]
 [0.   1.   2.2 ]
 [0.   1.   2.1 ]
 [1.   0.   1.8 ]
 [0.   1.   1.95]
 [1.   0.   1.9 ]
 [1.   0.   1.8 ]
 [1.   0.   1.75]]
待预测示例的特征： [[0.   1.   1.95]]
预测结果: Tall


In [2]:
import numpy as np

# 训练数据集
heights = np.array([1.6, 2.0, 1.9, 1.88, 1.7, 1.85, 1.6, 1.7, 2.2, 2.1, 1.8, 1.95, 1.9, 1.8, 1.75])
labels = np.array(['Short', 'Tall', 'Medium', 'Medium', 'Short', 'Medium', 'Short', 'Short', 'Tall', 'Tall', 'Medium', 'Medium', 'Medium', 'Medium', 'Medium'])

# 计算每个类别的均值和标准差
means = {}
stds = {}

for label in np.unique(labels):
    heights_for_label = heights[labels == label]
    means[label] = np.mean(heights_for_label)
    stds[label] = np.std(heights_for_label)

# 显示均值和标准差
print("Means:", means)
print("Stds:", stds)

# 计算后验概率
def calculate_probability(x, mean, std):
    exponent = np.exp(-((x - mean) ** 2 / (2 * std ** 2)))
    return (1 / (np.sqrt(2 * np.pi) * std)) * exponent

x = 1.95
probabilities = {}

for label in means:
    prior = len(labels[labels == label]) / len(labels)
    conditional = calculate_probability(x, means[label], stds[label])
    probabilities[label] = prior * conditional

# 显示后验概率
print("Probabilities:", probabilities)

# 预测结果
predicted_class = max(probabilities, key=probabilities.get)
print("预测结果:", predicted_class)


Means: {'Medium': 1.8537499999999998, 'Short': 1.6500000000000001, 'Tall': 2.1}
Stds: {'Medium': 0.06203577596838775, 'Short': 0.04999999999999993, 'Tall': 0.08164965809277268}
Probabilities: {'Medium': 1.0293045257737876, 'Short': 3.240470853239009e-08, 'Tall': 0.1807647533000548}
预测结果: Medium
