In [254]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statistics as stats

In [255]:
def transform_data(df):
    dummies = pd.get_dummies(df['outlook'])
    df      = pd.concat([df, dummies], axis=1)
    df      = df.drop(['outlook'], axis=1)
    cols    = df.columns.tolist()
    cols    = cols[-1 : -4 : -1] + cols[:-3]
    return df[cols]

In [256]:
def read_data(path):
    df = pd.read_csv(path)
    return transform_data(df)

In [257]:
def random_data(df):
    df = df.sample(frac=1)
    return df

In [258]:
def split_data(df, train_size):
    train = df[ : train_size]
    test  = df[train_size : ]
    return train, test

In [259]:
def euclidean_distance(x, y):
    return np.sqrt(np.sum((y[:-1] - x[:-1])**2))

In [260]:
def kNearestNeighbors(x, train_data, k):
    neighbors = []
    for i in range(len(train_data)):
        neighbors.append((euclidean_distance(x, train_data.iloc[i].values), i))
    neighbors.sort()
    return neighbors[:k+1]

In [261]:
def train(k, train_data):
    for i in range(len(train_data)):
        x = train_data.iloc[i].values
        y = kNearestNeighbors(x, train_data, k)
        results = df.iloc[[y[i][1] for i in range(len(y))]]['play'].tolist()[1:]
        prediction = stats.mode(results)
        print(f'Punto: {x} \nVecinos: {y[1:]} \nPredicción: {prediction}\n\n')

In [262]:
df = read_data('golf.csv')
train_data, test_data = split_data(df, 10)

In [264]:
train(3, train_data)

Punto: [1 0 0 85 85 0 'no'] 
Vecinos: [(7.14142842854285, 1), (16.401219466856727, 7), (21.93171219946131, 8)] 
Predicción: no


Punto: [1 0 0 80 90 1 'no'] 
Vecinos: [(7.14142842854285, 0), (9.486832980505138, 7), (22.847319317591726, 8)] 
Predicción: no


Punto: [0 0 1 83 86 0 'yes'] 
Vecinos: [(28.337254630609507, 6), (255.01176443450603, 0), (255.0529356819874, 1)] 
Predicción: no


Punto: [0 1 0 70 96 0 'yes'] 
Vecinos: [(16.1245154965971, 4), (16.76305461424021, 9), (26.49528259898354, 5)] 
Predicción: yes


Punto: [0 1 0 68 80 0 'yes'] 
Vecinos: [(7.0, 9), (10.488088481701515, 5), (16.1245154965971, 3)] 
Predicción: yes


Punto: [0 1 0 65 70 1 'no'] 
Vecinos: [(10.488088481701515, 4), (14.177446878757825, 9), (26.49528259898354, 3)] 
Predicción: yes


Punto: [0 0 1 64 65 1 'yes'] 
Vecinos: [(28.337254630609507, 2), (255.0529356819874, 5), (255.1019404081435, 8)] 
Predicción: yes


Punto: [1 0 0 72 95 0 'no'] 
Vecinos: [(9.486832980505138, 1), (16.401219466856727, 0), (25.1793566

  return np.sqrt(np.sum((y[:-1] - x[:-1])**2))


In [265]:
train(5, train_data)

Punto: [1 0 0 85 85 0 'no'] 
Vecinos: [(7.14142842854285, 1), (16.401219466856727, 7), (21.93171219946131, 8), (255.01176443450603, 2), (255.24693925686944, 9)] 
Predicción: yes


Punto: [1 0 0 80 90 1 'no'] 
Vecinos: [(7.14142842854285, 0), (9.486832980505138, 7), (22.847319317591726, 8), (255.0529356819874, 2), (255.24889813670106, 9)] 
Predicción: yes


Punto: [0 0 1 83 86 0 'yes'] 
Vecinos: [(28.337254630609507, 6), (255.01176443450603, 0), (255.0529356819874, 1), (255.1979623743105, 9), (255.39772904237032, 7)] 
Predicción: no


Punto: [0 1 0 70 96 0 'yes'] 
Vecinos: [(16.1245154965971, 4), (16.76305461424021, 9), (26.49528259898354, 5), (255.01176443450603, 7), (255.2704448227409, 1)] 
Predicción: no


Punto: [0 1 0 68 80 0 'yes'] 
Vecinos: [(7.0, 9), (10.488088481701515, 5), (16.1245154965971, 3), (255.199921630082, 8), (255.4740691342274, 7)] 
Predicción: yes


Punto: [0 1 0 65 70 1 'no'] 
Vecinos: [(10.488088481701515, 4), (14.177446878757825, 9), (26.49528259898354, 3), (255.

  return np.sqrt(np.sum((y[:-1] - x[:-1])**2))


In [266]:
train(7, train_data)

Punto: [1 0 0 85 85 0 'no'] 
Vecinos: [(7.14142842854285, 1), (16.401219466856727, 7), (21.93171219946131, 8), (255.01176443450603, 2), (255.24693925686944, 9), (255.6169008496895, 4), (255.67948685805828, 3)] 
Predicción: yes


Punto: [1 0 0 80 90 1 'no'] 
Vecinos: [(7.14142842854285, 0), (9.486832980505138, 7), (22.847319317591726, 8), (255.0529356819874, 2), (255.24889813670106, 9), (255.2704448227409, 3), (255.48189759746188, 4)] 
Predicción: yes


Punto: [0 0 1 83 86 0 'yes'] 
Vecinos: [(28.337254630609507, 6), (255.01176443450603, 0), (255.0529356819874, 1), (255.1979623743105, 9), (255.39772904237032, 7), (255.5132090519001, 4), (255.52886334032794, 3)] 
Predicción: yes


Punto: [0 1 0 70 96 0 'yes'] 
Vecinos: [(16.1245154965971, 4), (16.76305461424021, 9), (26.49528259898354, 5), (255.01176443450603, 7), (255.2704448227409, 1), (255.52886334032794, 2), (255.67948685805828, 0)] 
Predicción: no


Punto: [0 1 0 68 80 0 'yes'] 
Vecinos: [(7.0, 9), (10.488088481701515, 5), (16.12451

  return np.sqrt(np.sum((y[:-1] - x[:-1])**2))
