In [1]:
import pandas as pd
from random import seed
from random import randrange
from math import sqrt
from math import exp
from csv import reader
read_file = pd.read_csv (r'car.data')
read_file.to_csv (r'car.csv', index=None)
dataset_demo = pd.read_csv('car.csv')

def load_csv(file_name):
	file_value = open(file_name, "rt")
	lines_value = reader(file_value)
	dataset_details = list(lines_value)
	return dataset_details

# Convert string column to float
def str_column_to_float(dataset_value, column_value):
	for row_value in dataset_value:
		row_value[column_value] = float(row_value[column_value])
        
# Conversions of the string value  column to integer format.
def str_column_to_int(dataset, column_value):
	class_values = [row[column_value] for row in dataset] #processing through each and every value in the  class columns
	unique = set(class_values) # while set does not contains duplicate values
	lookup = dict() 
	for i, value in enumerate(unique):
		lookup[value] = i #conversions class values to integers value
	for row in dataset:
		row[column_value] = lookup[row[column_value]] #assigning integer values to class columns
	return lookup

# Load the value of the dataset in the list form
filename_value = 'car.data'
dataset = load_csv(filename_value)

# convert class column to int
for i in range(len(dataset[0])):
    lookup = str_column_to_int(dataset, i)
# convert the content of the columns which is in string form to float
for i in range(len(dataset[0])):
    str_column_to_float(dataset, i)
    
# Find the min and max values for each column
def dataset_minmax(dataset):
	minmax = list()
	for i in range(len(dataset[0])):
		col_values = [row[i] for row in dataset]
		value_min = min(col_values)
		value_max = max(col_values)
		minmax.append([value_min, value_max])
	return minmax

# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
	for row in dataset:
		for i in range(len(row)):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# Split a dataset into k folds
def cross_val(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for _ in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split

# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0

# Evaluate an algorithm using a cross validation split
def evaluate(data, alg, n_folds, *args):
	folds = cross_val(data, n_folds)
	scores = list()
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		predicted = alg(train_set, test_set, *args)
		actual = [row[-1] for row in fold]
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
	return scores

# Calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

# Locate the most similar neighbors using Euclidean Distance
def get_neighbors(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = euclidean_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
	neighbors = get_neighbors(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

# kNN Algorithm
def knn(train, test, num_neighbors):
	predictions = list()
	for row in test:
		output = predict_classification(train, row, num_neighbors)
		predictions.append(output)
	return(predictions)

# Calculate the Hamming distance between two vectors
def hamming_distance(row1, row2):
	dist_counter = 0.0
	for n in range(len(row1)):
		if row1[n] != row2[n]:
			dist_counter += 1
	return dist_counter

# Locate the most similar neighbors using Hamming distance
def get_neighbors_hamming(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = hamming_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

def predict_classification_hamming(train, test_row, num_neighbors):
	neighbors = get_neighbors_hamming(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

def knn_hamming(train, test, num_neighbors):
	predictions = list()
	for row in test:
		output = predict_classification_hamming(train, row, num_neighbors)
		predictions.append(output)
	return(predictions)

def manhattan_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += abs(row1[i] - row2[i])
	return (distance)

# Locate the most similar neighbors using Hamming distance
def get_neighbors_manhattan(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = manhattan_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

def predict_classification_manhattan(train, test_row, num_neighbors):
	neighbors = get_neighbors_manhattan(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

def knn_manhattan(train, test, num_neighbors):
	predictions = list()
	for row in test:
		output = predict_classification_manhattan(train, row, num_neighbors)
		predictions.append(output)
	return(predictions)

def minkowski_distance(row1, row2):
	distance = 0.0
	p = 3.0
	for i in range(len(row1)-1):
		distance += abs(row1[i] - row2[i])**p
	return (distance**(1.0/p))

# Locate the most similar neighbors using Hamming distance
def get_neighbors_minkowski(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = minkowski_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

def predict_classification_minkowski(train, test_row, num_neighbors):
	neighbors = get_neighbors_minkowski(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

def knn_minkowski(train, test, num_neighbors):
	predictions = list()
	for row in test:
		output = predict_classification_minkowski(train, row, num_neighbors)
		predictions.append(output)
	return(predictions)

seed(1)
n = 10 #10-k folds cross validation
neighbors = 5
result = evaluate(dataset, knn, n, neighbors)
result_hamming = evaluate(dataset, knn_hamming, n, neighbors)
result_manhattan = evaluate(dataset, knn_manhattan, n, neighbors)
result_minkowski = evaluate(dataset, knn_minkowski, n, neighbors)
print("*******************THE OUTPUT OF EUCLIDIAN******************")
print('Result of the Car Dataset: %s' % result)
print('Mean Accuracy of the Car program: %.3f%%' % (sum(result)/float(len(result))))
print("*******************THE OUTPUT OF HAMMING******************")
print('Result of the Car Dataset using Hamming Distance: %s' %result_hamming)
print('Mean Accuracy of the Car program using Hamming Distance: %.3f%%' % (sum(result_hamming)/float(len(result_hamming))))
print("*******************THE OUTPUT OF MANHATTAN******************")
print('Result of the Car Dataset using manhattan Distance: %s' % result_manhattan)
print('Mean Accuracy of the Car program using manhattan Distance: %.3f%%' % (sum(result_manhattan)/float(len(result_manhattan))))
print("*******************THE OUTPUT OF MINKOWSKI******************")
print('Result of the Car Dataset using minkowski Distance: %s' % result_minkowski)
print('Mean Accuracy of the Car program using minkowski Distance: %.3f%%' % (sum(result_minkowski)/float(len(result_minkowski))))

*******************THE OUTPUT OF EUCLIDIAN******************
Result of the Car Dataset: [92.44186046511628, 92.44186046511628, 93.6046511627907, 92.44186046511628, 93.6046511627907, 95.34883720930233, 87.79069767441861, 94.18604651162791, 93.6046511627907, 94.76744186046511]
Mean Accuracy of the Car program: 93.023%
*******************THE OUTPUT OF HAMMING******************
Result of the Car Dataset using Hamming Distance: [93.6046511627907, 87.79069767441861, 88.95348837209302, 87.20930232558139, 94.18604651162791, 89.53488372093024, 91.27906976744185, 88.37209302325581, 88.37209302325581, 87.79069767441861]
Mean Accuracy of the Car program using Hamming Distance: 89.709%
*******************THE OUTPUT OF MANHATTAN******************
Result of the Car Dataset using manhattan Distance: [96.51162790697676, 91.86046511627907, 93.6046511627907, 93.02325581395348, 90.11627906976744, 94.76744186046511, 96.51162790697676, 94.18604651162791, 94.18604651162791, 95.34883720930233]
Mean Accuracy o