In [2]:
import csv
import random
import math
import operator
from sklearn.metrics import mean_squared_error

# Loading Data Set
def load_Dataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for x in range(len(dataset)-1):
            for y in range(24):
                dataset[x][y] = float(dataset[x][y])
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

# Euclidean distance algorithm for training set + test instance
# length controls the which fields to include in distance calculation

def euclidean_Distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

# Finding the neighbors of the test instance in the training set

def get_Neighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclidean_Distance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors
 
# Voting on all the neighbors to classify the test instance 

def get_Response(neighbors):
    Votes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in Votes:
            Votes[response] += 1
        else:
            Votes[response] = 1
    sortedVotes = sorted(Votes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
 
# Accuracy test

def get_Accuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if (testSet[x][-1] <predictions[x]*1.15 and testSet[x][-1] >predictions[x]*0.85) :
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # Format Date
    trainingSet=[]
    testSet=[]
    split = 0.95
    load_Dataset('finalinputs.csv', split, trainingSet, testSet)
    print ('Train set: ' + repr(len(trainingSet)))
    print ('Test set: ' + repr(len(testSet)) + '\n') 
    # generate predictions
    predictions=[]
    k = 9
    for x in range(len(testSet)):
        neighbors = get_Neighbors(trainingSet, testSet[x], k)
        result = get_Response(neighbors)
        predictions.append(result)
        print('> Predicted = $' + repr(result) + ', Actual = $' + repr(testSet[x][-1]))
        #print('> Actual = ' + repr(testSet[x][-1]) + ', Predicted = ' + repr(result))
    accuracy = get_Accuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy))
main()

Train set: 10864
Test set: 1218

> Predicted = $664.0, Actual = $475.0
> Predicted = $510.0, Actual = $551.0
> Predicted = $350.0, Actual = $337.0
> Predicted = $665.0, Actual = $922.0
> Predicted = $2512.0, Actual = $2595.0
> Predicted = $1364.0, Actual = $1096.0
> Predicted = $1276.0, Actual = $1663.0
> Predicted = $1719.0, Actual = $2467.0
> Predicted = $1845.0, Actual = $2046.0
> Predicted = $1803.0, Actual = $3077.0
> Predicted = $477.0, Actual = $374.0
> Predicted = $917.0, Actual = $1342.0
> Predicted = $677.0, Actual = $755.0
> Predicted = $599.0, Actual = $954.0
> Predicted = $470.0, Actual = $502.0
> Predicted = $507.0, Actual = $837.0
> Predicted = $2254.0, Actual = $2298.0
> Predicted = $911.0, Actual = $807.0
> Predicted = $229.0, Actual = $392.0
> Predicted = $1338.0, Actual = $1790.0
> Predicted = $724.0, Actual = $758.0
> Predicted = $749.0, Actual = $524.0
> Predicted = $880.0, Actual = $609.0
> Predicted = $823.0, Actual = $625.0
> Predicted = $788.0, Actual = $657.0


> Predicted = $2696.0, Actual = $3117.0
> Predicted = $2105.0, Actual = $2049.0
> Predicted = $622.0, Actual = $613.0
> Predicted = $2134.0, Actual = $1796.0
> Predicted = $2893.0, Actual = $2917.0
> Predicted = $669.0, Actual = $287.0
> Predicted = $1825.0, Actual = $3454.0
> Predicted = $1822.0, Actual = $1895.0
> Predicted = $2256.0, Actual = $1939.0
> Predicted = $193.0, Actual = $233.0
> Predicted = $1274.0, Actual = $958.0
> Predicted = $762.0, Actual = $933.0
> Predicted = $2011.0, Actual = $1788.0
> Predicted = $4224.0, Actual = $3237.0
> Predicted = $832.0, Actual = $483.0
> Predicted = $1352.0, Actual = $872.0
> Predicted = $1412.0, Actual = $1598.0
> Predicted = $2190.0, Actual = $2034.0
> Predicted = $360.0, Actual = $576.0
> Predicted = $700.0, Actual = $559.0
> Predicted = $899.0, Actual = $1082.0
> Predicted = $1548.0, Actual = $1582.0
> Predicted = $993.0, Actual = $1563.0
> Predicted = $979.0, Actual = $1071.0
> Predicted = $100.0, Actual = $58.0
> Predicted = $1473.0,

> Predicted = $1176.0, Actual = $1520.0
> Predicted = $503.0, Actual = $820.0
> Predicted = $1776.0, Actual = $2048.0
> Predicted = $1130.0, Actual = $1894.0
> Predicted = $852.0, Actual = $761.0
> Predicted = $411.0, Actual = $724.0
> Predicted = $845.0, Actual = $854.0
> Predicted = $993.0, Actual = $1241.0
> Predicted = $2599.0, Actual = $1970.0
> Predicted = $2835.0, Actual = $3106.0
> Predicted = $1484.0, Actual = $1266.0
> Predicted = $1589.0, Actual = $1762.0
> Predicted = $1427.0, Actual = $1477.0
> Predicted = $582.0, Actual = $936.0
> Predicted = $434.0, Actual = $668.0
> Predicted = $1229.0, Actual = $774.0
> Predicted = $884.0, Actual = $846.0
> Predicted = $1422.0, Actual = $1324.0
> Predicted = $2363.0, Actual = $2809.0
> Predicted = $1291.0, Actual = $1226.0
> Predicted = $727.0, Actual = $880.0
> Predicted = $982.0, Actual = $764.0
> Predicted = $1611.0, Actual = $1059.0
> Predicted = $735.0, Actual = $968.0
> Predicted = $1128.0, Actual = $727.0
> Predicted = $665.0, A

> Predicted = $1069.0, Actual = $1014.0
> Predicted = $2033.0, Actual = $1588.0
> Predicted = $1415.0, Actual = $1521.0
> Predicted = $475.0, Actual = $929.0
> Predicted = $1266.0, Actual = $1067.0
> Predicted = $1259.0, Actual = $1251.0
> Predicted = $557.0, Actual = $562.0
> Predicted = $204.0, Actual = $303.0
> Predicted = $720.0, Actual = $768.0
> Predicted = $1218.0, Actual = $1235.0
> Predicted = $1357.0, Actual = $1264.0
> Predicted = $1241.0, Actual = $876.0
> Predicted = $2340.0, Actual = $2473.0
> Predicted = $1518.0, Actual = $1372.0
> Predicted = $1826.0, Actual = $2323.0
> Predicted = $2759.0, Actual = $2723.0
> Predicted = $1755.0, Actual = $1964.0
> Predicted = $698.0, Actual = $669.0
> Predicted = $1970.0, Actual = $3764.0
> Predicted = $1750.0, Actual = $1492.0
> Predicted = $896.0, Actual = $1096.0
> Predicted = $2506.0, Actual = $2220.0
> Predicted = $2286.0, Actual = $1756.0
> Predicted = $2465.0, Actual = $1471.0
> Predicted = $609.0, Actual = $843.0
> Predicted = 

> Predicted = $2289.0, Actual = $1365.0
> Predicted = $749.0, Actual = $562.0
> Predicted = $2467.0, Actual = $2274.0
> Predicted = $2935.0, Actual = $2483.0
> Predicted = $472.0, Actual = $417.0
> Predicted = $1025.0, Actual = $1166.0
> Predicted = $387.0, Actual = $285.0
> Predicted = $1456.0, Actual = $1510.0
> Predicted = $1602.0, Actual = $1730.0
> Predicted = $262.0, Actual = $231.0
> Predicted = $1054.0, Actual = $1112.0
> Predicted = $2123.0, Actual = $2513.0
> Predicted = $1405.0, Actual = $1113.0
> Predicted = $857.0, Actual = $958.0
> Predicted = $1087.0, Actual = $862.0
> Predicted = $792.0, Actual = $761.0
> Predicted = $1454.0, Actual = $1270.0
> Predicted = $2354.0, Actual = $2210.0
> Predicted = $1408.0, Actual = $1243.0
> Predicted = $792.0, Actual = $1428.0
> Predicted = $696.0, Actual = $528.0
> Predicted = $4661.0, Actual = $2907.0
> Predicted = $1327.0, Actual = $1596.0
> Predicted = $1046.0, Actual = $1101.0
> Predicted = $1868.0, Actual = $1887.0
> Predicted = $1

> Predicted = $2849.0, Actual = $2330.0
> Predicted = $1170.0, Actual = $921.0
> Predicted = $1470.0, Actual = $1961.0
> Predicted = $457.0, Actual = $704.0
> Predicted = $1797.0, Actual = $2070.0
> Predicted = $865.0, Actual = $1153.0
> Predicted = $460.0, Actual = $624.0
> Predicted = $1441.0, Actual = $1781.0
> Predicted = $1068.0, Actual = $717.0
> Predicted = $978.0, Actual = $1000.0
> Predicted = $1357.0, Actual = $1132.0
> Predicted = $411.0, Actual = $586.0
> Predicted = $772.0, Actual = $583.0
> Predicted = $992.0, Actual = $1125.0
> Predicted = $1758.0, Actual = $2470.0
> Predicted = $779.0, Actual = $767.0
> Predicted = $1079.0, Actual = $1399.0
> Predicted = $1200.0, Actual = $1121.0
> Predicted = $381.0, Actual = $325.0
> Predicted = $585.0, Actual = $635.0
> Predicted = $1507.0, Actual = $1172.0
> Predicted = $1531.0, Actual = $1349.0
> Predicted = $1347.0, Actual = $1861.0
> Predicted = $681.0, Actual = $649.0
> Predicted = $364.0, Actual = $477.0
> Predicted = $757.0, A