In [6]:
# 加载 Iris 数据集
iris = datasets.load_iris()
X = iris.data  # 特征
y = iris.target  # 标签

from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score, hamming_loss, confusion_matrix

def evaluate_model(y_true, y_pred):
    # Accuracy
    acc = accuracy_score(y_true, y_pred)
    
    # Precision (Macro-average: treats all classes equally)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=1)
    
    # Recall (Macro-average: treats all classes equally)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=1)
    
    # F1 Score (Macro-average: treats all classes equally)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=1)
    
    # Hamming Loss
    h_loss = hamming_loss(y_true, y_pred)
    
    # Confusion Matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    
    # Print results
    print(f'Accuracy: {acc:.2f}')
    print(f'Precision (macro): {precision:.2f}')
    print(f'Recall (macro): {recall:.2f}')
    print(f'F1 Score (macro): {f1:.2f}')
    print(f'Hamming Loss: {h_loss:.2f}')
    print('Confusion Matrix:')
    print(conf_matrix)
    
    return acc, precision, recall, f1, h_loss, conf_matrix

# 标准化函数
def standardize_data(X_train, X_test):
    sc = StandardScaler()
    X_train_std = sc.fit_transform(X_train)
    X_test_std = sc.transform(X_test)
    return X_train_std, X_test_std


In [17]:
# linear 2 class 2 feature 
X_2_features = X[y != 2, :2]  # 选择前两个特征 feature
y_2_features = y[y != 2]      # 选择类别0和1   label (0：Iris-setosa； 1：Iris-versicolor)

# 分割数据集为训练集和测试集   Split the data set into a training set and a test set
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2_features, y_2_features, test_size=0.3, random_state=1)

# 标准化数据 standardized data
X_train_2_std, X_test_2_std = standardize_data(X_train_2, X_test_2)

# 训练并评估 Perceptron 模型
ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X_train_2_std, y_train_2)
y_pred_ppn_2 = ppn.predict(X_test_2_std)

print('Perceptron Results (2 features, linear):')
evaluate_model(y_test_2, y_pred_ppn_2)

# 训练并评估 Adaline 模型
ada = AdalineGD(eta=0.01, n_iter=50)
ada.fit(X_train_2_std, y_train_2)
y_pred_ada_2 = ada.predict(X_test_2_std)

print('Adaline Results (2 features, linear):')
evaluate_model(y_test_2, y_pred_ada_2)


Perceptron Results (2 features, linear):
Accuracy: 0.53
Precision (macro): 0.67
Recall (macro): 0.67
F1 Score (macro): 0.33
Hamming Loss: 0.47
Confusion Matrix:
[[ 0  0  0]
 [14  0  0]
 [ 0  0 16]]
Adaline Results (2 features, linear):
Accuracy: 0.53
Precision (macro): 0.55
Recall (macro): 0.67
F1 Score (macro): 0.26
Hamming Loss: 0.47
Confusion Matrix:
[[ 0  0  0]
 [ 5  0  9]
 [ 0  0 16]]


(0.5333333333333333,
 np.float64(0.5466666666666667),
 np.float64(0.6666666666666666),
 np.float64(0.2601626016260163),
 0.4666666666666667,
 array([[ 0,  0,  0],
        [ 5,  0,  9],
        [ 0,  0, 16]]))

In [18]:
# 选2 class 3 feature 
X_3_features = X[y != 2, :3]  # 选择前三个特征 feature
y_3_features = y[y != 2]      # 选择类别0和1  (0：Iris-setosa； 1：Iris-versicolor)

# 分割数据集为训练集和测试集 Split the data set into a training set and a test set
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(X_3_features, y_3_features, test_size=0.3, random_state=1)

# 标准化数据  standardized data
X_train_3_std, X_test_3_std = standardize_data(X_train_3, X_test_3)

# 训练并评估 Perceptron 模型
ppn.fit(X_train_3_std, y_train_3)
y_pred_ppn_3 = ppn.predict(X_test_3_std)

print('Perceptron Results (3 features, linear):')
evaluate_model(y_test_3, y_pred_ppn_3)

# 训练并评估 Adaline 模型
ada.fit(X_train_3_std, y_train_3)
y_pred_ada_3 = ada.predict(X_test_3_std)

print('Adaline Results (3 features, linear):')
evaluate_model(y_test_3, y_pred_ada_3)


Perceptron Results (3 features, linear):
Accuracy: 0.53
Precision (macro): 0.67
Recall (macro): 0.67
F1 Score (macro): 0.33
Hamming Loss: 0.47
Confusion Matrix:
[[ 0  0  0]
 [14  0  0]
 [ 0  0 16]]
Adaline Results (3 features, linear):
Accuracy: 0.53
Precision (macro): 0.57
Recall (macro): 0.67
F1 Score (macro): 0.27
Hamming Loss: 0.47
Confusion Matrix:
[[ 0  0  0]
 [ 7  0  7]
 [ 0  0 16]]


(0.5333333333333333,
 np.float64(0.5652173913043478),
 np.float64(0.6666666666666666),
 np.float64(0.2735042735042735),
 0.4666666666666667,
 array([[ 0,  0,  0],
        [ 7,  0,  7],
        [ 0,  0, 16]]))

In [19]:
# 2 class 4 feature   
X_4_features = X[y != 2, :]  # 选择所有四个特征  4feature
y_4_features = y[y != 2]     # 选择类别0和1  label  (0：Iris-setosa； 1：Iris-versicolor)

# 分割数据集为训练集和测试集
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4_features, y_4_features, test_size=0.3, random_state=1)

# 标准化数据
X_train_4_std, X_test_4_std = standardize_data(X_train_4, X_test_4)

# 训练并评估 Perceptron 模型
ppn.fit(X_train_4_std, y_train_4)
y_pred_ppn_4 = ppn.predict(X_test_4_std)

print('Perceptron Results (4 features, linear):')
evaluate_model(y_test_4, y_pred_ppn_4)

# 训练并评估 Adaline 模型
ada.fit(X_train_4_std, y_train_4)
y_pred_ada_4 = ada.predict(X_test_4_std)

print('Adaline Results (4 features, linear):')
evaluate_model(y_test_4, y_pred_ada_4)


Perceptron Results (4 features, linear):
Accuracy: 0.53
Precision (macro): 0.77
Recall (macro): 0.50
F1 Score (macro): 0.35
Hamming Loss: 0.47
Confusion Matrix:
[[ 0 14]
 [ 0 16]]
Adaline Results (4 features, linear):
Accuracy: 0.00
Precision (macro): 0.33
Recall (macro): 0.33
F1 Score (macro): 0.00
Hamming Loss: 1.00
Confusion Matrix:
[[ 0  0  0]
 [ 0  0 14]
 [16  0  0]]


(0.0,
 np.float64(0.3333333333333333),
 np.float64(0.3333333333333333),
 np.float64(0.0),
 1.0,
 array([[ 0,  0,  0],
        [ 0,  0, 14],
        [16,  0,  0]]))

In [24]:
# not linear (Iris-versicolor (1); Iris-virginica (2))

In [25]:
# linear 2 class 2 feature 
X_2_features = X[y != 2, :2]  # 选择前两个特征 feature
y_2_features = y[y != 0]      # 选择类别1和2   label (Iris-versicolor (1); Iris-virginica (2))

# 分割数据集为训练集和测试集   Split the data set into a training set and a test set
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2_features, y_2_features, test_size=0.3, random_state=1)

# 标准化数据 standardized data
X_train_2_std, X_test_2_std = standardize_data(X_train_2, X_test_2)

# 训练并评估 Perceptron 模型
ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X_train_2_std, y_train_2)
y_pred_ppn_2 = ppn.predict(X_test_2_std)

print('Perceptron Results (2 features, not linear):')
evaluate_model(y_test_2, y_pred_ppn_2)

# 训练并评估 Adaline 模型
ada = AdalineGD(eta=0.01, n_iter=50)
ada.fit(X_train_2_std, y_train_2)
y_pred_ada_2 = ada.predict(X_test_2_std)

print('Adaline Results (2 features, not linear):')
evaluate_model(y_test_2, y_pred_ada_2)

Perceptron Results (2 features, not linear):
Accuracy: 0.43
Precision (macro): 0.48
Recall (macro): 0.64
F1 Score (macro): 0.20
Hamming Loss: 0.57
Confusion Matrix:
[[ 0  0  0]
 [ 1 13  0]
 [ 0 16  0]]
Adaline Results (2 features, not linear):
Accuracy: 0.47
Precision (macro): 0.73
Recall (macro): 0.50
F1 Score (macro): 0.32
Hamming Loss: 0.53
Confusion Matrix:
[[14  0]
 [16  0]]


(0.4666666666666667,
 np.float64(0.7333333333333334),
 np.float64(0.5),
 np.float64(0.3181818181818182),
 0.5333333333333333,
 array([[14,  0],
        [16,  0]]))

In [26]:
# linear 2 class 3 feature 
X_2_features = X[y != 2, :3]  # 选择前3个特征 feature
y_2_features = y[y != 0]      # 选择类别1和2   label (Iris-versicolor (1); Iris-virginica (2))

# 分割数据集为训练集和测试集   Split the data set into a training set and a test set
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2_features, y_2_features, test_size=0.3, random_state=1)

# 标准化数据 standardized data
X_train_2_std, X_test_2_std = standardize_data(X_train_2, X_test_2)

# 训练并评估 Perceptron 模型
ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X_train_2_std, y_train_2)
y_pred_ppn_2 = ppn.predict(X_test_2_std)

print('Perceptron Results (3 features,  not linear):')
evaluate_model(y_test_2, y_pred_ppn_2)

# 训练并评估 Adaline 模型
ada = AdalineGD(eta=0.01, n_iter=50)
ada.fit(X_train_2_std, y_train_2)
y_pred_ada_2 = ada.predict(X_test_2_std)

print('Adaline Results (3 features, not linear):')
evaluate_model(y_test_2, y_pred_ada_2)

Perceptron Results (3 features,  not linear):
Accuracy: 0.40
Precision (macro): 0.48
Recall (macro): 0.62
F1 Score (macro): 0.19
Hamming Loss: 0.60
Confusion Matrix:
[[ 0  0  0]
 [ 2 12  0]
 [ 0 16  0]]
Adaline Results (3 features, not linear):
Accuracy: 0.47
Precision (macro): 0.73
Recall (macro): 0.50
F1 Score (macro): 0.32
Hamming Loss: 0.53
Confusion Matrix:
[[14  0]
 [16  0]]


(0.4666666666666667,
 np.float64(0.7333333333333334),
 np.float64(0.5),
 np.float64(0.3181818181818182),
 0.5333333333333333,
 array([[14,  0],
        [16,  0]]))

In [27]:
# linear 2 class 4 feature 
X_2_features = X[y != 2, :]  # 选择前4个特征 feature
y_2_features = y[y != 0]      # 选择类别1和2   label (Iris-versicolor (1); Iris-virginica (2))

# 分割数据集为训练集和测试集   Split the data set into a training set and a test set
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2_features, y_2_features, test_size=0.3, random_state=1)

# 标准化数据 standardized data
X_train_2_std, X_test_2_std = standardize_data(X_train_2, X_test_2)

# 训练并评估 Perceptron 模型
ppn = Perceptron(eta=0.1, n_iter=10)
ppn.fit(X_train_2_std, y_train_2)
y_pred_ppn_2 = ppn.predict(X_test_2_std)

print('Perceptron Results (4 features, not linear):')
evaluate_model(y_test_2, y_pred_ppn_2)

# 训练并评估 Adaline 模型
ada = AdalineGD(eta=0.01, n_iter=50)
ada.fit(X_train_2_std, y_train_2)
y_pred_ada_2 = ada.predict(X_test_2_std)

print('Adaline Results (4 features, not linear):')
evaluate_model(y_test_2, y_pred_ada_2)

Perceptron Results (4 features, not linear):
Accuracy: 0.37
Precision (macro): 0.47
Recall (macro): 0.60
F1 Score (macro): 0.18
Hamming Loss: 0.63
Confusion Matrix:
[[ 0  0  0]
 [ 3 11  0]
 [ 0 16  0]]
Adaline Results (4 features, not linear):
Accuracy: 0.47
Precision (macro): 0.67
Recall (macro): 0.67
F1 Score (macro): 0.33
Hamming Loss: 0.53
Confusion Matrix:
[[ 0  0  0]
 [ 0 14  0]
 [16  0  0]]


(0.4666666666666667,
 np.float64(0.6666666666666666),
 np.float64(0.6666666666666666),
 np.float64(0.3333333333333333),
 0.5333333333333333,
 array([[ 0,  0,  0],
        [ 0, 14,  0],
        [16,  0,  0]]))

In [28]:
'''
experiment result and analyze:
1. Linear Classification (Linearly Separable Data)
1.1 Perceptron vs Adaline with 2 Features
In the case of two linearly separable features, both models performed similarly in terms of accuracy. 
However, Perceptron had a slight edge in precision and F1 score, while Adaline was better at handling class imbalance. 
Perceptron failed to correctly classify any of the samples from class 1, whereas Adaline managed to classify a portion of class 1 correctly, suggesting that Adaline is slightly more robust in handling imbalanced data.

1.2 Perceptron vs Adaline with 3 Features
With three features, the performance of both models remained quite similar to the two-feature case. 
Adaline continued to perform slightly better in handling class imbalance by correctly classifying some class 1 samples. 
Perceptron maintained its accuracy but still struggled with class 1 classification. 
This suggests that Adaline shows some degree of flexibility in handling additional features in linearly separable conditions.

1.3 Perceptron vs Adaline with 4 Features
When testing with all four linearly separable features, Perceptron managed to maintain a relatively stable performance, although its recall for class 1 was quite low, indicating that it struggled to classify class 1 correctly. 
In contrast, Adaline performed extremely poorly in this scenario, failing to make any correct classifications. This suggests that as the feature space becomes more complex, Perceptron is able to maintain its performance, while Adaline is less capable of handling more dimensions, leading to a complete breakdown in performance.

2. Non-Linear Classification (Non-Linearly Separable Data)
2.1 Perceptron vs Adaline with 2 Features
When dealing with non-linearly separable data with two features, both models saw a decline in performance. 
Adaline slightly outperformed Perceptron in terms of precision and F1 score. 
Perceptron had a higher recall but performed poorly in terms of precision, indicating that it made more incorrect predictions. 
Adaline demonstrated a more balanced performance across the classes, showing that it handles non-linear data slightly better than Perceptron.

2.2 Perceptron vs Adaline with 3 Features
With three non-linearly separable features, the pattern from the two-feature test persisted. 
Adaline consistently outperformed Perceptron in terms of precision and F1 score. 
Perceptron continued to struggle with precision, while Adaline demonstrated a more balanced handling of the classes. 
This suggests that Adaline can better adapt to non-linearly separable data even as the feature set expands.

2.3 Perceptron vs Adaline with 4 Features
In the case of four non-linearly separable features, Perceptron's performance deteriorated further, with poor precision and F1 scores. 
Adaline maintained its advantage with a more balanced classification, showing higher precision and F1 score, but it still struggled with class 1 predictions. 
Overall, Adaline's better handling of feature complexity and class imbalance in non-linear tasks became more apparent in this case.

General Observations and Conclusions
Linear Classification:

Perceptron showed consistent performance in linearly separable conditions, particularly when using simpler feature combinations (2 and 3 features). However, as the feature complexity increased, its ability to handle class imbalance decreased, leading to poor recall for class 1.
Adaline struggled with higher-dimensional linearly separable tasks, particularly when all four features were used. Its complete failure in the 4-feature case suggests that it is less adaptable to high-dimensional linearly separable problems compared to Perceptron.
Non-Linear Classification:

Adaline consistently outperformed Perceptron in non-linearly separable conditions across all feature combinations. Its superior precision and F1 scores indicate that it handles class imbalance and non-linearity more effectively.
Perceptron, on the other hand, struggled significantly with non-linearly separable data. It consistently produced lower precision and F1 scores, suggesting that it is less suited for non-linear classification tasks.
Impact of Feature Complexity:

In linearly separable conditions, Perceptron's performance remained relatively stable as the number of features increased, whereas Adaline performed worse with more features.
In non-linearly separable conditions, the increasing number of features did not significantly improve the performance of either model, but Adaline maintained an edge over Perceptron due to its better handling of non-linearity and class imbalance.
Class Imbalance Handling:

Both models showed limitations in handling class imbalance, but Adaline demonstrated a better ability to classify imbalanced classes correctly, particularly in non-linear scenarios.
Final Verdict:
Perceptron is more stable in linearly separable tasks but struggles significantly with non-linear data and class imbalance, especially as the feature set grows.
Adaline performs better in non-linearly separable tasks, showing better adaptability to non-linear conditions and class imbalance. However, it struggles with high-dimensional linearly separable data, especially when using all four features.
The choice between the two models ultimately depends on the task. Perceptron is suitable for simpler, linearly separable tasks, while Adaline is more appropriate for tasks that involve non-linear separability or class imbalance, though it may not scale well with feature complexity in linearly separable cases.
'''

"\nexperiment result and analyze:\n1. Linear Classification (Linearly Separable Data)\n1.1 Perceptron vs Adaline with 2 Features\nIn the case of two linearly separable features, both models performed similarly in terms of accuracy. \nHowever, Perceptron had a slight edge in precision and F1 score, while Adaline was better at handling class imbalance. \nPerceptron failed to correctly classify any of the samples from class 1, whereas Adaline managed to classify a portion of class 1 correctly, suggesting that Adaline is slightly more robust in handling imbalanced data.\n\n1.2 Perceptron vs Adaline with 3 Features\nWith three features, the performance of both models remained quite similar to the two-feature case. \nAdaline continued to perform slightly better in handling class imbalance by correctly classifying some class 1 samples. \nPerceptron maintained its accuracy but still struggled with class 1 classification. \nThis suggests that Adaline shows some degree of flexibility in handling