# Imports

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
import pandas
import numpy
import data_massage
import itertools
import matplotlib.pyplot as plt

In [2]:
metadata, full_data, induction_only = data_massage.load_data()
summarized_data = data_massage.summarize_data(metadata, induction_only)

knn_data = induction_only[induction_only["label"] != "background"]
features = sorted(["R_mean", "Temp.", "Humidity"])

hidden_layers = tuple(map(lambda p: (len(features) ** p), [3,4,5,4,3]))
feature_permutations = list(itertools.chain.from_iterable(map(lambda r: itertools.combinations(features, r), range(1, 1 + len(features)))))

In [3]:
results = {}

for perm in feature_permutations:
    key = ','.join(perm)
    feature_set = list(perm)

    knn_train_x, knn_test_x, knn_train_y, knn_test_y = train_test_split(
          knn_data[feature_set]
        , knn_data["label"]
        , test_size=0.2
        , stratify=knn_data["label"]
        , random_state=7
    )
    
    knn = KNeighborsClassifier(algorithm="auto", n_neighbors=7, weights="uniform")
    knn.fit(knn_train_x, knn_train_y)
    knn_y_pred = knn.predict(knn_test_x)
    
    report = classification_report(knn_test_y, knn_y_pred, zero_division=False)
    
    results[key] = {"prediction": knn_y_pred, "report": report}
    
    print(key)
    print(report)
    print("\n\n\n")

Humidity
              precision    recall  f1-score   support

      banana       0.55      0.52      0.54     38221
        wine       0.61      0.64      0.63     45071

    accuracy                           0.59     83292
   macro avg       0.58      0.58      0.58     83292
weighted avg       0.59      0.59      0.59     83292





R_mean
              precision    recall  f1-score   support

      banana       0.63      0.63      0.63     38221
        wine       0.69      0.69      0.69     45071

    accuracy                           0.66     83292
   macro avg       0.66      0.66      0.66     83292
weighted avg       0.66      0.66      0.66     83292





Temp.
              precision    recall  f1-score   support

      banana       0.54      0.49      0.51     38221
        wine       0.60      0.64      0.62     45071

    accuracy                           0.57     83292
   macro avg       0.57      0.57      0.57     83292
weighted avg       0.57      0.57      0.57 