In [12]:
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from glob import glob
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [13]:
ss = StandardScaler()

def is_healty(typePlant):
  return int(typePlant != "braquiaria")

In [14]:
stock_files = sorted(glob("data/_*.csv"))

all_data = pd.concat((pd.read_csv(file).assign(filename = file)
          for file in stock_files), ignore_index = True)
classesNames = [c.split('_')[-1][:-4] for c in stock_files]

In [15]:
all_data = all_data.drop(columns=['Function', 'Sample num'])

In [16]:
X = np.asarray(all_data[['F1 (410nm)', 
                         'F2 (440nm)',
                         'F3 (470nm)',
                         'F4 (510nm)',
                         'F5 (550nm)',
                         'F6 (583nm)',
                         'F7 (620nm)',
                         'F8 (670nm)',
                         'CLEAR']])

In [17]:
Y = np.asarray([classesNames.index(fn.split('_')[-1][:-4]) for fn in all_data['filename']])

In [18]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, shuffle=True) 
print("quantidade de dados de treino: {}".format(X_train.shape))
print("quantidade de dados de teste: {}".format(X_test.shape))

quantidade de dados de treino: (473, 9)
quantidade de dados de teste: (158, 9)


In [19]:
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [20]:
grid_params = {'n_neighbors': [1,3,5,11,19],
               'weights': ['uniform', 'distance'],
               'metric': ['euclidean', 'manhattan']}

gs = GridSearchCV(KNeighborsClassifier(), grid_params, verbose=1,
                  cv=3, n_jobs=1)

gs_results = gs.fit(X_train, Y_train)

Fitting 3 folds for each of 20 candidates, totalling 60 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    0.3s finished


In [21]:
print(gs_results.best_score_)
print(gs_results.best_estimator_)
print(gs_results.best_params_)

0.777970383509366
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='distance')
{'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}


In [22]:
knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [23]:
knn.fit(X_train, Y_train) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                     weights='uniform')

In [24]:
cv_scores_train = cross_val_score(knn, X_train, Y_train, cv=10)
cv_scores_test = cross_val_score(knn, X_test, Y_test, cv=10)
print('Cross Treino:{}'.format(np.mean(cv_scores_train)))
print('Cross Teste:{}'.format(np.mean(cv_scores_test)))

Cross Treino:0.7863475177304965
Cross Teste:0.7420833333333334


In [25]:
print("Treino Score: {}".format(knn.score(X_train, Y_train))) #acertos treino
print("Teste Score: {}".format(knn.score(X_test, Y_test))) #acertos teste

Treino Score: 1.0
Teste Score: 0.810126582278481


In [26]:
flowerx = ss.transform(np.array([[260, 378, 496, 636, 782, 757, 994, 1207, 5787]])) #braquiaria saudavel
flower_type = classesNames[int(knn.predict(flowerx))]
print("Saida = %d, flor da classe: %s" % (is_healty(flower_type),flower_type))

Saida = 0, flor da classe: braquiaria


In [27]:
flowerx = ss.transform(np.array([[366, 471, 632, 796, 999,  967,  1326, 1532, 7318]])) #AMARGOSO NÃO SAUDAVEL
flower_type = classesNames[int(knn.predict(flowerx))]
print("Saida = %d, flor da classe: %s" % (is_healty(flower_type),flower_type))

Saida = 1, flor da classe: amargoso


In [28]:
flowerx = ss.transform(np.array([[458, 606, 845, 1002, 1272, 1382, 1747, 2108, 8244]])) #CARURU NÃO SAUDAVEL
flower_type = classesNames[int(knn.predict(flowerx))]
print("Saida = %d, flor da classe: %s" % (is_healty(flower_type),flower_type))

Saida = 1, flor da classe: caruru


In [29]:
flowerx = ss.transform(np.array([[387, 516 ,625, 799,  947,  946,  1145, 1379, 7544]])) #JUAZEIRO NÃO SAUDAVEL
flower_type = classesNames[int(knn.predict(flowerx))]
print("Saida = %d, flor da classe: %s" % (is_healty(flower_type),flower_type))

Saida = 1, flor da classe: juazeiro


In [30]:
flowerx = ss.transform(np.array([[254, 347 ,439, 558,  674,  710,  846,  1007, 4331]])) #LEITEIRO NÃO SAUDAVEL
flower_type = classesNames[int(knn.predict(flowerx))]
print("Saida = %d, flor da classe: %s" % (is_healty(flower_type),flower_type))

Saida = 1, flor da classe: leiteiro
