<a href="https://colab.research.google.com/github/Kawin101/935306-Artificial-intelligence/blob/main/AI102_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# file: kNN.py
import numpy as np

def kNN(Xtrain, Ytrain, Xtest, k=1):
  Ytest = []
  for x in Xtest:
      d = np.sqrt(np.sum((Xtrain - x)**2, axis=1))
      idx = np.argsord(d)
      (values, counts) = np.unique(Ytrain[idx[:k]], return_counts=True)
      ind = np.argmax(counts)
      Ytest.append(values[ind])
  return Ytest

In [None]:
# file: iris_dataset.py
# Before run get data download from this ...
# https://archive.ics.uci.edu/ml/machine-learning-databases/iris/
import pandas as pd
import os
import numpy as np


def load(path='./dataset/iris.csv', split_train_test=None):
    if os.path.isfile(path):
      iris = pd.read_csv(path)
    
    else:
        url = 'https://archive.ics.uci.edu/ml/machine-learning-' + 'databases/iris/iris.data'
        iris = pd.read_csv(url, header=None)
        iris.to_csv(path, index=False)
    X = iris.iloc[:, :4].values
    Y = iris.iloc[:, -1].values
    if split_train_test:
        classes = np.unique(Y)
        itrain = np.empty((0,), dtype=np.int)
        itest = np.empty((0,), dtype=np.int)
        for i in classes:
          idx = np.where(Y == i)[0]
          split = int(len(idx) * split_train_test)
          itrain = np.concatenate((itrain, idx[:split]))
          itest = np.concatenate((itest, idx[split:]))
        return X[itrain], Y[itrain], X[itest], Y[itest]
    return X, Y

if __name__ == '__main__':
  irisInputs, irisTargets = load_iris_dataset()
  print(irisInputs)
  print(irisTargets)


In [None]:
# file: kNN_iris.py 
from kNN import kNN
import iris_dataset
import matplotlib.pyplot as plt
import numpy as np

def plotdata(Xtrain, Ytrain, Xtest=[], Ytest=[], Ztest=[]):
    color = {'Iris-setosa': 'b',
             'Iris-versicolor': 'g',
             'Iris-virginica': 'r', }
    for i in range(len(Xtrain)):
      plt.plot(Xtrain[i][0], Xtrain[i][1], 'x', c=color[Ytrain[i]], mfc='none')
    for i in range(len(Xtest)):
      plt.plot(Xtest[i][0], Xtest[i][1], '.', c='none', mfc=color[Ytest[i]])
    for i in range(len(Ztest)):
      plt.plot(Xtest[i][0], Xtest[i][1], 'o', c=color[Ztest[i]], mfc='none')

if __name__ == '__main__':
  Xtrain, Ytrain, Xtest, Ytest = iris_dataset.load(split_train_test=0.5)

  plt.figure(1)
  rate = []
  K = range(1, len(Xtrain)+1)
  for k in K:
    Ztest = kNN(Xtrain, Ytrain, Xtest, k)
    plotdata(Xtrain, Ytrain, Xtest, Ytest, Ztest)
    plt.title('k = ' + str(k))
    plt.draw()
    plt.pause(0.001)
    plt.cla()
    rate.append(np.sum(Ztest == Ytest) / len(Ytest) * 100)
  
  plt.figure(2)
  plt.plot(K,rate)
  plt.axis([0,80,30,100])
  plt.xlabel('k')
  plt.ylabel('Accuracy rate (%)')
  plt.grid(True)
  plt.show()
  print(rate)

  plt.figure(3)
  k = rate.index(max(rate)) + 1
  Ztest = kNN(Xtrain, Ytrain, Xtest, k)
  plotdata(Xtrain, Ytrain, Xtest, Ytest, Ztest)
  plt.title('k = ' + str(k))
  plt.show()

In [None]:
# kNN GUI by Thidarat mata: https://github.com/Thidaratmatha05/Py_GUI

# Workshop 3
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset('iris') # Load data form Internet
df.head() # Show data for example 5 rows
df.isnull().sum()

# sns.scatterplot('sepal_length', 'sepal_width',data=df, hue='species') # point plot multi colors
# sns.scatterplot('petal_length', 'petal_width',data=df, hue='species')
# columns = ['sepal_length','sepal_width','petal_length','species']
# sns.pairplot(df[columns], hue='species', height=2.0)
# plt.show()

# Workshop 4
X = df.drop('species', axis=1)
feature_cols = ['sepal_length','sepal_width','petal_length','petal_width']
X = df[feature_cols]
X.head()

y = df.species
y.head()
X.shape
y.shape

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [None]:
# Workshop 5
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()

cvs = cross_val_score(model, X, y, cv=10) # sub to 10 cell (k=10)
print("cross val scores {}".format(cvs))
print("mean (%) = {:.3f}".format(cvs.mean() *100 ))

cross val scores [0.93333333 0.93333333 1.         0.93333333 0.93333333 0.93333333
 0.86666667 1.         1.         1.        ]
mean (%) = 95.333


In [None]:
# Workshop 6
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)

(label, count) = np.unique(y_test, return_counts=True)
freq = np.asarray((label, count)).T
# print(freq)

model = GaussianNB()

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# print('\n Accuracy Score: {:.4f}'.format(accuracy_score(y_test, y_pred)))
# print(classification_report(y_test, y_pred))
# cm = confusion_matrix(y_test, y_pred)
# print(cm)

y_test[(y_test == y_pred)].count() # นับจำนวนที่ทำนายถูกต้อง
dy = pd.DataFrame()
dy['y_test'] = y_test
dy['y_pred'] = y_pred
dy['result'] = y_test == y_pred

# print(y_test[dy.result == True].count()) # นับจำนวนที่ทำนายถูก
# dy[dy.result == False]

# Workshop 7
X_input = np.array([[4.7,3.6,2.0,0.8],
                    [6.5,2.5,4.8,1.4]])

# model.fit(X, y)
# print(model.predict(X_input))

# print(y.unique())
# print(model.predict_proba(X_input).round(8))


# Workshop 10
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.model_selection import cross_val_score

model = KNeighborsClassifier(n_neighbors=3)
cvs = cross_val_score(model, X, y, cv=10) # no need to use model.fit
print('\n cross val scores {}'.format(cvs.round(3)))
print('mean (%) = {:.3f}'.format(cvs.mean() *100))

model.fit(X, y)
print(model.predict(X_input))


 cross val scores [1.    0.933 1.    0.933 0.867 1.    0.933 1.    1.    1.   ]
mean (%) = 96.667
['setosa' 'versicolor']




In [None]:
import numpy as np

# Project: Sneaker for you
# Europe Sneaker Size
# https://www.zappos.com/c/shoe-size-conversion

# Dataset's Men
HeightM = [120,130,140,150,160,170,180,190,200,210,220] # 11 numbers
WeightM = [40,50,60,70,80,90,100,110,120,130,140] # 11 numbers
SizeM = [39,40,41,42,43,44,45,46,47,48,49] # 11 numbers

# Dataset's Women
HeightW = [100,110,120,130,140,150,160,170,180,190,200] # 11 numbers
WeightW = [40,50,60,70,80,90,100,110,120,130] # 10 numbers
SizeW = [35,36,37,38,39,40,41,42,43,44] # 10 numbers

data = [1,2,3,4,5,6,7,8,9]
num1 = 175
dist=[]
for n in HeightM:
  # d = np.absolute(num1-n)
  d = np.sqrt((num1-n)**2)
  dist.append(d)

print(np.amin(dist))

# ต้องการรู้ตำแหน่งข้อมูลที่ใกล้มากที่สุด
idx = np.where(dist==np.amin(dist))
print(idx[0])

5.0
[5 6]


In [None]:
# https://youtu.be/DgAcnKeM_LM
# https://youtu.be/6e2enYJUM4M
import numpy as np
def kNN(todo):
  # [Height, Weight, Size, Gender]
  x = np.array([ [120,40,39],[130,50,40],[140,60,41],[150,70,42],[160,80,43],
                [170,90,44],[180,100,45],[190,110,46],[200,120,47],[210,130,48] ])
  y = np.array( ['Women','Women','Women','Women','Women',
                 'Men','Men','Men','Men','Men'])
  Point = np.array(todo)
  Distance = np.zeros(len(y))
  for i,dataX in enumerate(x):
    Distance[i] = np.sqrt(np.sum((Point-dataX)**2))

  minDistance = np.min(Distance) # ส่งค่าน้อยที่สุด
  indexMin = np.argmin(Distance) # ส่งค่ากลับคืนเป็นลำดับ
  predictResult = y[indexMin]
  return minDistance, indexMin, predictResult

def kNN02(todo2):
  from sklearn.neighbors import KNeighborsClassifier
  from sklearn.model_selection import train_test_split
  from sklearn.metrics import classification_report, confusion_matrix
  import pandas as pd

  # Define value for get data
  # ตัวแปรรับค่า K, Height, Weight, Size
 
  fileDataset = 'Dataset_01.csv'
  data = pd.read_csv(fileDataset)
  data.head(10)

  x = data[['Height', 'Weight', 'Size']]
  y = data['Gender']

  # Set data between train and test
  x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=10)
  # print(x_test.count())

  # Define K value, Set model between train and test
  kNN_model = KNeighborsClassifier(n_neighbors=Neighbors_input)
  kNN_model = kNN_model.fit(x_train, y_train)
  kNNScore = kNN_model.score(x_test, y_test)
  # print(kNNScore)

  # Input data and Show Output
  todo2 = [ [Height_input,Weight_input,Size_input]]
  Result_Output = kNN_model.predict(todo2)
  print(Result_Output)

  return Height_input,Weight_input,Size_input,Neighbors_input

todo2 = [150,50,40,5]
Result_Output = kNN02(todo2)
print(Result_Output)

# todo = [160,50,39]
# result = kNN(todo)
# print(result)

['Women']
(0, 0, 0, 5)




In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

fileDataset = 'Dataset_01.csv'
# link = 'https://github.com/Kawin101/kawin101.github.io/blob/main/Dataset_01.csv'
data = pd.read_csv(fileDataset)
# data.info()
# data.describe()
# data.head(10)

# print(data.Height.unique())
# print(data.Weight.unique())
# print(data.Size.unique())

data['Gender'] = data.Gender.replace(['Women','Men'],[0,1])
data.head(10)

x = data[['Height', 'Weight', 'Size']]
y = data['Gender']

# Set data between train and test
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=10)
# print(x_test.count())

# Define K value, Set model between train and test
kNN_model = KNeighborsClassifier(n_neighbors=4)
kNN_model = kNN_model.fit(x_train, y_train)
kNNScore = kNN_model.score(x_test, y_test)
# print(kNNScore)

# Input data and Show Output
StartkNN = [ [190,50,45]]
print(kNN_model.predict(StartkNN))

[0]




In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

# Define value for get data
# ตัวแปรรับค่า K, Height, Weight, Size
Neighbors_input = 5
Height_input = 180
Weight_input = 80
Size_input = 45

fileDataset = 'Dataset_01.csv'
data = pd.read_csv(fileDataset)

data.head(10)

x = data[['Height', 'Weight', 'Size']]
y = data['Gender']

# Set data between train and test
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=10)
# print(x_test.count())

# Define K value, Set model between train and test
kNN_model = KNeighborsClassifier(n_neighbors=Neighbors_input)
kNN_model = kNN_model.fit(x_train, y_train)
kNNScore = kNN_model.score(x_test, y_test)
# print(kNNScore)

# Input data and Show Output
StartkNN = [ [Height_input,Weight_input,Size_input]]
Result_Output = kNN_model.predict(StartkNN)
print(Result_Output)

['Men']


