In [6]:
import csv
import random
import math
import operator
from sklearn.metrics import mean_squared_error

# Loading Data Set
def load_Dataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for x in range(len(dataset)-1):
            for y in range(24):
                dataset[x][y] = float(dataset[x][y])
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

# Euclidean distance algorithm for training set + test instance
# length controls the which fields to include in distance calculation

def euclidean_Distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

# Finding the neighbors of the test instance in the training set

def get_Neighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclidean_Distance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors
 
# Voting on all the neighbors to classify the test instance 

def get_Response(neighbors):
    Votes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in Votes:
            Votes[response] += 1
        else:
            Votes[response] = 1
    sortedVotes = sorted(Votes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
 
# Accuracy test

def get_Accuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if (testSet[x][-1] <predictions[x]*1.25 and testSet[x][-1] >predictions[x]*0.75) :
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # Format Date
    trainingSet=[]
    testSet=[]
    split = 0.95
    load_Dataset('finalinputs.csv', split, trainingSet, testSet)
    print ('Train set: ' + repr(len(trainingSet)))
    print ('Test set: ' + repr(len(testSet)) + '\n') 
    # generate predictions
    predictions=[]
    k = 9
    for x in range(len(testSet)):
        neighbors = get_Neighbors(trainingSet, testSet[x], k)
        result = get_Response(neighbors)
        predictions.append(result)
        print('> Predicted = $' + repr(result) + ', Actual = $' + repr(testSet[x][-1]))
        #print('> Actual = ' + repr(testSet[x][-1]) + ', Predicted = ' + repr(result))
    accuracy = get_Accuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy))
main()

Train set: 11438
Test set: 644

> Predicted = $1455.0, Actual = $1327.0
> Predicted = $1430.0, Actual = $1435.0
> Predicted = $1459.0, Actual = $1196.0
> Predicted = $1351.0, Actual = $995.0
> Predicted = $492.0, Actual = $654.0
> Predicted = $1048.0, Actual = $940.0
> Predicted = $701.0, Actual = $712.0
> Predicted = $2153.0, Actual = $2259.0
> Predicted = $749.0, Actual = $524.0
> Predicted = $1147.0, Actual = $934.0
> Predicted = $578.0, Actual = $590.0
> Predicted = $293.0, Actual = $535.0
> Predicted = $1270.0, Actual = $1228.0
> Predicted = $500.0, Actual = $564.0
> Predicted = $2431.0, Actual = $2092.0
> Predicted = $967.0, Actual = $1042.0
> Predicted = $3059.0, Actual = $4276.0
> Predicted = $952.0, Actual = $978.0
> Predicted = $1008.0, Actual = $1036.0
> Predicted = $722.0, Actual = $764.0
> Predicted = $2710.0, Actual = $2820.0
> Predicted = $674.0, Actual = $766.0
> Predicted = $957.0, Actual = $1258.0
> Predicted = $668.0, Actual = $768.0
> Predicted = $1328.0, Actual = $

> Predicted = $1596.0, Actual = $2003.0
> Predicted = $2428.0, Actual = $2153.0
> Predicted = $1908.0, Actual = $2616.0
> Predicted = $931.0, Actual = $580.0
> Predicted = $643.0, Actual = $1361.0
> Predicted = $2216.0, Actual = $2253.0
> Predicted = $594.0, Actual = $425.0
> Predicted = $1264.0, Actual = $2009.0
> Predicted = $1536.0, Actual = $1353.0
> Predicted = $867.0, Actual = $973.0
> Predicted = $2539.0, Actual = $1743.0
> Predicted = $2402.0, Actual = $2963.0
> Predicted = $271.0, Actual = $470.0
> Predicted = $862.0, Actual = $1073.0
> Predicted = $790.0, Actual = $729.0
> Predicted = $1198.0, Actual = $1674.0
> Predicted = $740.0, Actual = $664.0
> Predicted = $2324.0, Actual = $2155.0
> Predicted = $1112.0, Actual = $2007.0
> Predicted = $833.0, Actual = $860.0
> Predicted = $1355.0, Actual = $1323.0
> Predicted = $2667.0, Actual = $2567.0
> Predicted = $984.0, Actual = $769.0
> Predicted = $1583.0, Actual = $1542.0
> Predicted = $600.0, Actual = $610.0
> Predicted = $994.0

> Predicted = $1488.0, Actual = $1354.0
> Predicted = $298.0, Actual = $236.0
> Predicted = $1042.0, Actual = $1099.0
> Predicted = $472.0, Actual = $417.0
> Predicted = $838.0, Actual = $1462.0
> Predicted = $437.0, Actual = $729.0
> Predicted = $1468.0, Actual = $784.0
> Predicted = $1087.0, Actual = $862.0
> Predicted = $1250.0, Actual = $1201.0
> Predicted = $1104.0, Actual = $1229.0
> Predicted = $3651.0, Actual = $3791.0
> Predicted = $2503.0, Actual = $1816.0
> Predicted = $1847.0, Actual = $1306.0
> Predicted = $1252.0, Actual = $1190.0
> Predicted = $1797.0, Actual = $1395.0
> Predicted = $1842.0, Actual = $1689.0
> Predicted = $1692.0, Actual = $1374.0
> Predicted = $460.0, Actual = $411.0
> Predicted = $260.0, Actual = $259.0
> Predicted = $2130.0, Actual = $2225.0
> Predicted = $2181.0, Actual = $2233.0
> Predicted = $1975.0, Actual = $1826.0
> Predicted = $1358.0, Actual = $1253.0
> Predicted = $1760.0, Actual = $1917.0
> Predicted = $1782.0, Actual = $1586.0
> Predicted =

> Predicted = $1431.0, Actual = $1511.0
> Predicted = $2515.0, Actual = $2358.0
> Predicted = $755.0, Actual = $677.0
> Predicted = $1689.0, Actual = $1473.0
> Predicted = $551.0, Actual = $586.0
> Predicted = $1932.0, Actual = $1417.0
> Predicted = $351.0, Actual = $323.0
> Predicted = $1031.0, Actual = $948.0
> Predicted = $394.0, Actual = $526.0
> Predicted = $2815.0, Actual = $3467.0
> Predicted = $490.0, Actual = $454.0
Accuracy: 67.70186335403726
