In [None]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis # QDA
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier # KNN
from sklearn.model_selection import train_test_split

In [33]:
# 导入数据
train_data = pd.read_csv('traindd.CSV',encoding='utf-8',usecols=[0,2,3,4,5])
test_data = pd.read_csv('testdd.CSV',encoding='utf-8',usecols=[1,2,3,4])
train_data

Unnamed: 0,develop,GDP,life,literacy,education
0,A,41890,77.9,99.5,93.3
1,A,29461,79.1,99.2,88.0
2,A,23381,78.9,96.0,99.0
3,A,29663,79.4,92.5,87.3
4,A,28529,80.3,98.4,90.6
5,A,22029,77.9,99.0,96.0
6,B,6000,77.7,99.8,87.6
7,B,9060,71.9,97.3,76.8
8,B,8402,71.7,88.6,87.5
9,B,8677,69.6,92.6,71.2


In [34]:
# 转换类别变量
# pd.get_dummies 转换成多列矩阵
# pd.factorize 转换成单列数值
train_data['Category'] = pd.factorize(train_data['develop'])[0]
train_data = train_data.loc[:,'GDP':'Category']
train_data

Unnamed: 0,GDP,life,literacy,education,Category
0,41890,77.9,99.5,93.3,0
1,29461,79.1,99.2,88.0,0
2,23381,78.9,96.0,99.0,0
3,29663,79.4,92.5,87.3,0
4,28529,80.3,98.4,90.6,0
5,22029,77.9,99.0,96.0,0
6,6000,77.7,99.8,87.6,1
7,9060,71.9,97.3,76.8,1
8,8402,71.7,88.6,87.5,1
9,8677,69.6,92.6,71.2,1


In [35]:
# 特征矩阵和目标向量
X = train_data.drop('Category', axis=1).values
y = train_data['Category'].values
X_test = test_data.values
X

array([[4.1890e+04, 7.7900e+01, 9.9500e+01, 9.3300e+01],
       [2.9461e+04, 7.9100e+01, 9.9200e+01, 8.8000e+01],
       [2.3381e+04, 7.8900e+01, 9.6000e+01, 9.9000e+01],
       [2.9663e+04, 7.9400e+01, 9.2500e+01, 8.7300e+01],
       [2.8529e+04, 8.0300e+01, 9.8400e+01, 9.0600e+01],
       [2.2029e+04, 7.7900e+01, 9.9000e+01, 9.6000e+01],
       [6.0000e+03, 7.7700e+01, 9.9800e+01, 8.7600e+01],
       [9.0600e+03, 7.1900e+01, 9.7300e+01, 7.6800e+01],
       [8.4020e+03, 7.1700e+01, 8.8600e+01, 8.7500e+01],
       [8.6770e+03, 6.9600e+01, 9.2600e+01, 7.1200e+01],
       [5.1370e+03, 7.1000e+01, 9.2600e+01, 8.1100e+01],
       [8.4070e+03, 7.1400e+01, 8.7400e+01, 6.8700e+01],
       [1.5500e+03, 6.2600e+01, 4.8600e+01, 5.8100e+01],
       [1.1280e+03, 4.6500e+01, 6.9100e+01, 5.6200e+01],
       [2.2990e+03, 4.9800e+01, 6.7900e+01, 6.2300e+01],
       [2.3700e+03, 6.4600e+01, 4.9900e+01, 4.0000e+01],
       [3.0710e+03, 7.3700e+01, 9.0300e+01, 6.3900e+01],
       [3.8430e+03, 6.9700e+01,

In [36]:
# 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test = scaler.fit_transform(X_test)
X_test

array([[ 1.67647912,  1.29214016,  1.10676723,  1.43175391],
       [-0.89691934, -0.31278852, -1.57630485, -1.21609223],
       [-0.59114613,  0.44653258,  0.53484924, -0.58108841],
       [-0.18841366, -1.42588422, -0.06531162,  0.36542673]])

In [37]:
# LDA
# 创建LDA模型并训练
discriminant = LinearDiscriminantAnalysis()
discriminant.fit(X_scaled, y)
# 预测
y_pred = discriminant.predict(X_test)
y_pred

array([0, 2, 1, 1])

In [38]:
# KNN
cov = np.cov(X_scaled.T)
inv = np.linalg.inv(cov)
# 创建KNN模型并训练（使用欧氏距离，k=3）
knn = KNeighborsClassifier(n_neighbors=3, 
                           metric='mahalanobis',
                           metric_params={'VI':inv})
knn.fit(X_scaled, y)
# 预测
y_pred = knn.predict(X_test)
y_pred

array([0, 2, 2, 2])

In [41]:
# QDA
# 创建 QDA 模型并训练
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_scaled, y)

# 预测并评估
y_pred = qda.predict(X_test)
y_pred

array([0, 2, 1, 1])