In [9]:
'''KNN using random division of data (67% training, 33% testing), 
this program will generate predicted class, actual class of data and total accuracy of model. '''

import csv
import random
import math
import operator

def loadDataset(sampleFile, splitRatio, trainingSet=[] , testSet=[]): #Split the data into training and testing set
    print("sepal_length, sepal_width, petal_length, petal_width")
    with open(sampleFile) as csvfile:
        fileData = csv.reader(csvfile) #fileData is an object
        dataset = list(fileData)
        for x in range(len(dataset)-1):
            for y in range(4):
                dataset[x][y] = float(dataset[x][y]) #converting string data into float
            if random.random() < splitRatio:  #random.random(),returns next random floating point num in the range(0.0,1.0)
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

def euclideanDistance(instOne, instTwo, length): #Calculating the euclidean Distance between two instances 
    ecDistance = 0
    for x in range(length): #Loop for all attributes of the set
        ecDistance += pow((instOne[x] - instTwo[x]), 2)
    return math.sqrt(ecDistance)

def getNeighbors(trainingSet, testInstance, k):     #Calculating k nearest neighbour
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length) #Calculate distance test set from each training set
        distances.append((trainingSet[x], dist)) #Stroing distance of each training set [whole list, distance]
    distances.sort(key=operator.itemgetter(1)) 
    #constructs a callable that assumes an iterable object(list,tuple,set) as input, and fetches the nth element out of it.
    neighbors = []
    for x in range(k): #find nearest k 
        neighbors.append(distances[x][0]) #Storing all k nearby lists
    #print(neighbors)
    return neighbors

def getResponse(neighbors):
    classVotes = {}                    #A 2D dictionary which is storing {class label, count}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]    #Storing the class label, which is commonly at the end of the list
        if response in classVotes: 
            classVotes[response] += 1  #Increment the class label as it is in KNN
        else:
            classVotes[response] = 1   #Create the class label with count one
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True) #Sort list based on counts
    return sortedVotes[0][0]

def getAccuracy(testSet, predictions): #Predicting accuracy of test results
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]: #Check if predicted class and actual class is same
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # prepare data
    trainingSet=[]
    testSet=[]
    splitRatio = 0.67
    loadDataset(r'C:\Users\shree\Desktop\notbook\DM\iris.data', splitRatio, trainingSet, testSet) #Spliting data into 2 parts 67%,33%
    print('Train set: '+repr(len(trainingSet))) #Length of training data
    print('Test set: '+repr(len(testSet)))      #Length of testing data
    predictions=[] #predicted labels
    k = 3
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)  #finding neighbour
        result = getResponse(neighbors)                       #store the relevent class label 
        predictions.append(result)
        print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%')
main()

sepal_length, sepal_width, petal_length, petal_width
Train set: 95
Test set: 55
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-setosa', actual='Iris-setosa'
> predicted='Iris-versicolor', actual='Iris-versicolor'


In [30]:
for i in range(1,20):
    print(random.random(), end="   ")

0.48487235580073074   0.6822872916926128   0.5062676582105694   0.9428042469555008   0.34628251283003264   0.47913600830119096   0.5898590580696208   0.5225355903132026   0.11222925483665813   0.15373725424730944   0.19567176779768358   0.8587078321785188   0.29778785074529635   0.39760317005611956   0.5574296307977408   0.954608929134144   0.11980485432292909   0.5701851855540312   0.33544672177781965   

In [6]:
'''USER INPUT KNN: when user enters the Test Data set, based on predefine data set we predict the class label of Test Set 
'''
import csv
import random
import math
import operator

def loadDataset(sampleFile, trainingSet=[] , testSet=[]): #Split the data into training and testing set
    with open(sampleFile) as csvfile:
        fileData = csv.reader(csvfile) #fileData is an object
        dataset = list(fileData)
        #for k in dataset:
            #print(",".join(k))
        for x in range(len(dataset)-1):
            for y in range(4):
                dataset[x][y] = float(dataset[x][y]) #converting string data into float
            trainingSet.append(dataset[x])
            
def euclideanDistance(instOne, instTwo, length): #Calculating the euclidean Distance between two instances 
    ecDistance = 0
    for x in range(length): #Loop for all attributes of the set
        ecDistance += pow((instTwo[x] - instTwo[x]), 2)
    return math.sqrt(ecDistance)

def getNeighbors(trainingSet, testInstance, k):     #Calculating k nearest neighbour
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length) #Calculate distance test set from each training set
        distances.append((trainingSet[x], dist)) #Stroing distance of each training set
    distances.sort(key=operator.itemgetter(1)) 
    #constructs a callable that assumes an iterable object(list,tuple,set) as input, and fetches the nth element out of it.
    neighbors = []
    for x in range(k): #find nearest k 
        neighbors.append(distances[x][0]) #Storing all k nearby lists
    #print(neighbors)
    return neighbors

def getResponse(neighbors):
    classVotes = {}                    #A 2D dictionary which is storing {class label, count}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]    #Storing the class label, which is commonly at the end of the list
        if response in classVotes: 
            classVotes[response] += 1  #Increment the class label as it is in KNN
        else:
            classVotes[response] = 1   #Create the class label with count one
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True) #Sort list based on counts
    return sortedVotes[0][0]

def main():
    # prepare data
    print("sepal_length, sepal_width, petal_length, petal_width")
    size=int(input("Number of test cases you want to enter : "))
    trainingSet=[]
    testSet=[]
    for i in range(size):          #A for loop for row entries 
        a =[] 
        print("Enter data for Test Set[",i,"]")
        for j in range(4):         #A for loop for column entries (as there are 4 columns) 
            a.append(float(input())) 
        testSet.append(a)
    for i in range(len(testSet)):
        print(testSet[i][-1])
    loadDataset(r'C:\Users\shree\Desktop\notbook\DM\iris.data',trainingSet, testSet) 
    print('Train set: '+repr(len(trainingSet))) #Length of training data
    print('Test set: '+repr(len(testSet)))      #Length of testing data
    predictions=[] #predicted labels
    k = 3
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)  #finding neighbour
        result = getResponse(neighbors)                       #store the relevent class label 
        predictions.append(result)
        print('> predicted=' + repr(result))
main()

sepal_length, sepal_width, petal_length, petal_width
Number of test cases you want to enter : 1
Enter data for Test Set[ 0 ]
2
3
5
6
6.0
Train set: 150
Test set: 1
> predicted='Iris-setosa'
