In [24]:
import csv
import random
import math
import operator
from sklearn.metrics import mean_squared_error

# Loading Data Set
def load_Dataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for x in range(len(dataset)-1):
            for y in range(24):
                dataset[x][y] = float(dataset[x][y])
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

# Euclidean distance algorithm for training set + test instance
# length controls the which fields to include in distance calculation

def euclidean_Distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

# Finding the neighbors of the test instance in the training set

def get_Neighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclidean_Distance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors
 
# Voting on all the neighbors to classify the test instance 

def get_Response(neighbors):
    Votes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in Votes:
            Votes[response] += 1
        else:
            Votes[response] = 1
    sortedVotes = sorted(Votes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
 
# Accuracy test

def get_Accuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # Format Date
    trainingSet=[]
    testSet=[]
    split = 0.95
    load_Dataset('finalinputs.csv', split, trainingSet, testSet)
    print ('Train set: ' + repr(len(trainingSet)))
    print ('Test set: ' + repr(len(testSet)) + '\n') 
    # generate predictions
    predictions=[]
    k = 9
    for x in range(len(testSet)):
        neighbors = get_Neighbors(trainingSet, testSet[x], k)
        result = get_Response(neighbors)
        predictions.append(result)
        print('> Predicted = $' + repr(result) + ', Actual = $' + repr(testSet[x][-1]))
        #print('> Actual = ' + repr(testSet[x][-1]) + ', Predicted = ' + repr(result))
    accuracy = get_Accuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy))
main()

Train set: 11481
Test set: 601

> Predicted = $865.0, Actual = $943.0
> Predicted = $1845.0, Actual = $2046.0
> Predicted = $917.0, Actual = $1342.0
> Predicted = $1241.0, Actual = $1161.0
> Predicted = $858.0, Actual = $1281.0
> Predicted = $293.0, Actual = $535.0
> Predicted = $995.0, Actual = $964.0
> Predicted = $952.0, Actual = $920.0
> Predicted = $966.0, Actual = $863.0
> Predicted = $655.0, Actual = $424.0
> Predicted = $1115.0, Actual = $1666.0
> Predicted = $915.0, Actual = $904.0
> Predicted = $760.0, Actual = $758.0
> Predicted = $2209.0, Actual = $3543.0
> Predicted = $1522.0, Actual = $1422.0
> Predicted = $327.0, Actual = $995.0
> Predicted = $799.0, Actual = $845.0
> Predicted = $2897.0, Actual = $2901.0
> Predicted = $1744.0, Actual = $1215.0
> Predicted = $1416.0, Actual = $1249.0
> Predicted = $2450.0, Actual = $1205.0
> Predicted = $562.0, Actual = $570.0
> Predicted = $600.0, Actual = $590.0
> Predicted = $580.0, Actual = $617.0
> Predicted = $838.0, Actual = $800.

> Predicted = $306.0, Actual = $300.0
> Predicted = $1311.0, Actual = $890.0
> Predicted = $179.0, Actual = $854.0
> Predicted = $2516.0, Actual = $4619.0
> Predicted = $3013.0, Actual = $3745.0
> Predicted = $1408.0, Actual = $1650.0
> Predicted = $511.0, Actual = $567.0
> Predicted = $529.0, Actual = $640.0
> Predicted = $790.0, Actual = $729.0
> Predicted = $624.0, Actual = $1020.0
> Predicted = $2417.0, Actual = $2504.0
> Predicted = $1007.0, Actual = $1148.0
> Predicted = $1244.0, Actual = $1981.0
> Predicted = $2018.0, Actual = $1068.0
> Predicted = $533.0, Actual = $824.0
> Predicted = $685.0, Actual = $722.0
> Predicted = $463.0, Actual = $542.0
> Predicted = $2607.0, Actual = $2804.0
> Predicted = $2099.0, Actual = $2414.0
> Predicted = $1431.0, Actual = $1500.0
> Predicted = $985.0, Actual = $954.0
> Predicted = $4296.0, Actual = $3202.0
> Predicted = $1020.0, Actual = $1003.0
> Predicted = $933.0, Actual = $1137.0
> Predicted = $1668.0, Actual = $1057.0
> Predicted = $1042.0

> Predicted = $429.0, Actual = $654.0
> Predicted = $1163.0, Actual = $1042.0
> Predicted = $425.0, Actual = $321.0
> Predicted = $1176.0, Actual = $1273.0
> Predicted = $823.0, Actual = $856.0
> Predicted = $518.0, Actual = $535.0
> Predicted = $1740.0, Actual = $1970.0
> Predicted = $3077.0, Actual = $2213.0
> Predicted = $1149.0, Actual = $1098.0
> Predicted = $1725.0, Actual = $1534.0
> Predicted = $581.0, Actual = $330.0
> Predicted = $2157.0, Actual = $2216.0
> Predicted = $3247.0, Actual = $2646.0
> Predicted = $1847.0, Actual = $2416.0
> Predicted = $5447.0, Actual = $5108.0
> Predicted = $1184.0, Actual = $930.0
> Predicted = $995.0, Actual = $860.0
> Predicted = $593.0, Actual = $448.0
> Predicted = $1201.0, Actual = $1188.0
> Predicted = $952.0, Actual = $911.0
> Predicted = $1151.0, Actual = $1096.0
> Predicted = $1674.0, Actual = $1084.0
> Predicted = $803.0, Actual = $795.0
> Predicted = $188.0, Actual = $166.0
> Predicted = $2656.0, Actual = $3634.0
> Predicted = $722.0,

ValueError: y_true and y_pred have different number of output (24!=1)