In [1]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

### Przygotowanie danych

In [2]:
iris = load_iris()
x2 = iris.data[:, :2]
x3 = iris.data[:, :3]
x4 = iris.data
y = iris.target

In [3]:
x2_train, x2_test = train_test_split(x2, test_size=0.1, random_state=42)
x3_train, x3_test = train_test_split(x3, test_size=0.1, random_state=42)
x4_train, x4_test, y_train, y_test = train_test_split(x4, y, test_size=0.1, random_state=42)

### KNN

In [4]:
scaler2 = MinMaxScaler()
scaler3 = MinMaxScaler()
scaler4 = MinMaxScaler()
scaler2.fit(x2_train)
scaler3.fit(x3_train)
scaler4.fit(x4_train)
x2_train_norm = scaler2.transform(x2_train)
x3_train_norm = scaler3.transform(x3_train)
x4_train_norm = scaler4.transform(x4_train)
x2_test_norm = scaler2.transform(x2_test)
x3_test_norm = scaler3.transform(x3_test)
x4_test_norm = scaler4.transform(x4_test)

In [5]:
knn_best_score_x2 = 0.0
knn_best_params_x2 = {}
for neighbors in range(1, 101):
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            knn_classifier = KNeighborsClassifier(n_jobs=-1, n_neighbors=neighbors, p=p, weights=w)
            scores = cross_val_score(knn_classifier, x2_train_norm, y_train, cv=5)
            score = scores.mean()
            if score > knn_best_score_x2:
                knn_best_score_x2 = score
                knn_best_params_x2["neighbors"] = neighbors
                knn_best_params_x2["weights"] = w
                knn_best_params_x2["p"] = p
print('knn x2 score:', knn_best_score_x2)
print('knn x2 params', knn_best_params_x2)

knn_best_score_x3 = 0.0
knn_best_params_x3 = {}
for neighbors in range(1, 101):
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            knn_classifier = KNeighborsClassifier(n_jobs=-1, n_neighbors=neighbors, p=p, weights=w)
            scores = cross_val_score(knn_classifier, x3_train_norm, y_train, cv=5)
            score = scores.mean()
            if score > knn_best_score_x3:
                knn_best_score_x3 = score
                knn_best_params_x3["neighbors"] = neighbors
                knn_best_params_x3["weights"] = w
                knn_best_params_x3["p"] = p
print('knn x3 score:', knn_best_score_x3)
print('knn x3 params', knn_best_params_x3)

knn_best_score_x4 = 0.0
knn_best_params_x4 = {}
for neighbors in range(1, 101):
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            knn_classifier = KNeighborsClassifier(n_jobs=-1, n_neighbors=neighbors, p=p, weights=w)
            scores = cross_val_score(knn_classifier, x4_train_norm, y_train, cv=5)
            score = scores.mean()
            if score > knn_best_score_x4:
                knn_best_score_x4 = score
                knn_best_params_x4["neighbors"] = neighbors
                knn_best_params_x4["weights"] = w
                knn_best_params_x4["p"] = p
print('knn x4 score:', knn_best_score_x4)
print('knn x4 params', knn_best_params_x4)

knn x2 score: 0.8224338624338625
knn x2 params {'neighbors': 37, 'weights': 'uniform', 'p': 1}
knn x3 score: 0.9554920634920634
knn x3 params {'neighbors': 7, 'weights': 'uniform', 'p': 1}
knn x4 score: 0.9774497354497355
knn x4 params {'neighbors': 10, 'weights': 'uniform', 'p': 2}


In [6]:
knn_classifier_x2 = KNeighborsClassifier(n_jobs=-1, n_neighbors=knn_best_params_x2["neighbors"],
                                  p=knn_best_params_x2["p"],
                                  weights=knn_best_params_x2["weights"])
knn_classifier_x2.fit(x2_train_norm, y_train)
knn_preds_x2 = knn_classifier_x2.predict(x2_test_norm)
knn_accuracy_x2 = accuracy_score(y_test, knn_preds_x2)

knn_classifier_x3 = KNeighborsClassifier(n_jobs=-1, n_neighbors=knn_best_params_x3["neighbors"],
                                  p=knn_best_params_x3["p"],
                                  weights=knn_best_params_x3["weights"])
knn_classifier_x3.fit(x3_train_norm, y_train)
knn_preds_x3 = knn_classifier_x3.predict(x3_test_norm)
knn_accuracy_x3 = accuracy_score(y_test, knn_preds_x3)

knn_classifier_x4 = KNeighborsClassifier(n_jobs=-1, n_neighbors=knn_best_params_x4["neighbors"],
                                  p=knn_best_params_x4["p"],
                                  weights=knn_best_params_x4["weights"])
knn_classifier_x4.fit(x4_train_norm, y_train)
knn_preds_x4 = knn_classifier_x4.predict(x4_test_norm)
knn_accuracy_x4 = accuracy_score(y_test, knn_preds_x4)

### Random Forest

In [7]:
rf_classifier_x2 = RandomForestClassifier(n_estimators=100, n_jobs=-1)
rf_classifier_x3 = RandomForestClassifier(n_estimators=100, n_jobs=-1)
rf_classifier_x4 = RandomForestClassifier(n_estimators=100, n_jobs=-1)
rf_classifier_x2.fit(x2_train, y_train)
rf_classifier_x3.fit(x3_train, y_train)
rf_classifier_x4.fit(x4_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=None, verbose=0,
                       warm_start=False)

In [8]:
scores_x2 = cross_val_score(rf_classifier_x2, x2_train, y_train, cv=5)
scores_x3 = cross_val_score(rf_classifier_x3, x3_train, y_train, cv=5)
scores_x4 = cross_val_score(rf_classifier_x4, x4_train, y_train, cv=5)
score_x2 = scores_x2.mean()
score_x3 = scores_x3.mean()
score_x4 = scores_x4.mean()
print('x2 score:', score_x2)
print('x3 score:', score_x3)
print('x4 score:', score_x4)

x2 score: 0.6732380952380953
x3 score: 0.9472275132275133
x4 score: 0.9552275132275133


In [10]:
rf_preds_x2 = rf_classifier_x2.predict(x2_test)
rf_accuracy_x2 = accuracy_score(y_test, rf_preds_x2)
rf_preds_x3 = rf_classifier_x3.predict(x3_test)
rf_accuracy_x3 = accuracy_score(y_test, rf_preds_x3)
rf_preds_x4 = rf_classifier_x4.predict(x4_test)
rf_accuracy_x4 = accuracy_score(y_test, rf_preds_x4)

### Podsumowanie

In [11]:
print('KNN accuracy dla 2 cech:', knn_accuracy_x2)
print('KNN accuracy dla 3 cech:', knn_accuracy_x3)
print('KNN accuracy dla 4 cech:', knn_accuracy_x4)

print('RandomForest accuracy dla 2 cech:', rf_accuracy_x2)
print('RandomForest accuracy dla 2 cech:', rf_accuracy_x3)
print('RandomForest accuracy dla 2 cech:', rf_accuracy_x4)

KNN accuracy dla 2 cech: 0.8
KNN accuracy dla 3 cech: 0.8
KNN accuracy dla 4 cech: 1.0
RandomForest accuracy dla 2 cech: 0.8666666666666667
RandomForest accuracy dla 2 cech: 0.9333333333333333
RandomForest accuracy dla 2 cech: 1.0
