In [39]:
import csv
import random
import math
import operator
from sklearn.metrics import mean_squared_error

# Loading Data Set
def load_Dataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for x in range(len(dataset)-1):
            for y in range(24):
                dataset[x][y] = float(dataset[x][y])
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

# Euclidean distance algorithm for training set + test instance
# length controls the which fields to include in distance calculation

def euclidean_Distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

# Finding the neighbors of the test instance in the training set

def get_Neighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclidean_Distance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors
 
# Voting on all the neighbors to classify the test instance 

def get_Response(neighbors):
    Votes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in Votes:
            Votes[response] += 1
        else:
            Votes[response] = 1
    sortedVotes = sorted(Votes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
 
# Accuracy test

def get_Accuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if (testSet[x][-1] <predictions[x]*1.05 and testSet[x][-1] >predictions[x]*0.95) :
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # Format Date
    trainingSet=[]
    testSet=[]
    split = 0.95
    load_Dataset('finalinputs.csv', split, trainingSet, testSet)
    print ('Train set: ' + repr(len(trainingSet)))
    print ('Test set: ' + repr(len(testSet)) + '\n') 
    # generate predictions
    predictions=[]
    k = 9
    for x in range(len(testSet)):
        neighbors = get_Neighbors(trainingSet, testSet[x], k)
        result = get_Response(neighbors)
        predictions.append(result)
        print('> Predicted = $' + repr(result) + ', Actual = $' + repr(testSet[x][-1]))
        #print('> Actual = ' + repr(testSet[x][-1]) + ', Predicted = ' + repr(result))
    accuracy = get_Accuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy))
main()

Train set: 11502
Test set: 580

> Predicted = $920.0, Actual = $952.0
> Predicted = $656.0, Actual = $342.0
> Predicted = $193.0, Actual = $249.0
> Predicted = $1719.0, Actual = $2467.0
> Predicted = $2677.0, Actual = $2565.0
> Predicted = $1430.0, Actual = $1435.0
> Predicted = $146.0, Actual = $172.0
> Predicted = $1501.0, Actual = $984.0
> Predicted = $553.0, Actual = $750.0
> Predicted = $1310.0, Actual = $1131.0
> Predicted = $3936.0, Actual = $2445.0
> Predicted = $1583.0, Actual = $1689.0
> Predicted = $2082.0, Actual = $1820.0
> Predicted = $1910.0, Actual = $2026.0
> Predicted = $746.0, Actual = $565.0
> Predicted = $668.0, Actual = $675.0
> Predicted = $2617.0, Actual = $2538.0
> Predicted = $443.0, Actual = $719.0
> Predicted = $712.0, Actual = $728.0
> Predicted = $966.0, Actual = $930.0
> Predicted = $967.0, Actual = $1042.0
> Predicted = $979.0, Actual = $551.0
> Predicted = $703.0, Actual = $1011.0
> Predicted = $2209.0, Actual = $3543.0
> Predicted = $1899.0, Actual = $

> Predicted = $2932.0, Actual = $3177.0
> Predicted = $2356.0, Actual = $1857.0
> Predicted = $599.0, Actual = $665.0
> Predicted = $565.0, Actual = $451.0
> Predicted = $2051.0, Actual = $4343.0
> Predicted = $1221.0, Actual = $1422.0
> Predicted = $1371.0, Actual = $1317.0
> Predicted = $1630.0, Actual = $1710.0
> Predicted = $911.0, Actual = $929.0
> Predicted = $941.0, Actual = $796.0
> Predicted = $2765.0, Actual = $3240.0
> Predicted = $854.0, Actual = $865.0
> Predicted = $893.0, Actual = $964.0
> Predicted = $1594.0, Actual = $1121.0
> Predicted = $2338.0, Actual = $2516.0
> Predicted = $887.0, Actual = $797.0
> Predicted = $1613.0, Actual = $3391.0
> Predicted = $5371.0, Actual = $3304.0
> Predicted = $1480.0, Actual = $1297.0
> Predicted = $1675.0, Actual = $1748.0
> Predicted = $519.0, Actual = $361.0
> Predicted = $606.0, Actual = $629.0
> Predicted = $455.0, Actual = $509.0
> Predicted = $1203.0, Actual = $1437.0
> Predicted = $1701.0, Actual = $1743.0
> Predicted = $969.0

> Predicted = $1054.0, Actual = $835.0
> Predicted = $823.0, Actual = $856.0
> Predicted = $1587.0, Actual = $1870.0
> Predicted = $1746.0, Actual = $1293.0
> Predicted = $643.0, Actual = $501.0
> Predicted = $962.0, Actual = $660.0
> Predicted = $914.0, Actual = $908.0
> Predicted = $1155.0, Actual = $1157.0
> Predicted = $3396.0, Actual = $1747.0
> Predicted = $749.0, Actual = $720.0
> Predicted = $2108.0, Actual = $2062.0
> Predicted = $1510.0, Actual = $697.0
> Predicted = $740.0, Actual = $783.0
> Predicted = $2202.0, Actual = $2265.0
> Predicted = $1216.0, Actual = $1225.0
> Predicted = $1340.0, Actual = $1542.0
> Predicted = $292.0, Actual = $528.0
> Predicted = $614.0, Actual = $673.0
> Predicted = $1095.0, Actual = $1244.0
> Predicted = $2086.0, Actual = $2103.0
> Predicted = $1793.0, Actual = $1534.0
> Predicted = $756.0, Actual = $745.0
> Predicted = $1648.0, Actual = $1685.0
> Predicted = $1190.0, Actual = $991.0
> Predicted = $420.0, Actual = $459.0
> Predicted = $1354.0, 