In [2]:
from csv import reader
from math import sqrt

# Loading CSV file
def load_csv(filename):
	CropDataset = list()
	with open(filename, 'r') as file:
		csv_reader = reader(file)
		for row in csv_reader:
			if not row:
				continue
			CropDataset.append(row)
	return CropDataset

# Convert string column to float
def str_column_to_float(CropDataset, column):
	for row in CropDataset:
		row[column] = float(row[column].strip())

# Convert string column to integer
def str_column_to_int(CropDataset, column):
	class_values = [row[column] for row in CropDataset]
	unique = set(class_values)
	lookup = dict()
	for i, value in enumerate(unique):
		lookup[value] = i
		print('[%s] => %d' % (value, i))
	for row in CropDataset:
		row[column] = lookup[row[column]]
	return lookup

# Finding min and max values for each column
def dataset_minmax(CropDataset):
	minmax = list()
	for i in range(len(CropDataset[0])):
		col_values = [row[i] for row in CropDataset]
		value_min = min(col_values)
		value_max = max(col_values)
		minmax.append([value_min, value_max])
	return minmax

# Rescale dataset columns to the range 0-1
def normalize_dataset(CropDataset, minmax):
	for row in CropDataset:
		for i in range(len(row)):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# Calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = euclidean_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
	neighbors = get_neighbors(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction


filename = 'Crop_Predict.csv'
CropDataset = load_csv(filename)
for i in range(len(CropDataset[0])-1):
	str_column_to_float(CropDataset, i)
 
#Class column to integers
str_column_to_int(CropDataset, len(CropDataset[0])-1)

#Got the best k value = 3
num_neighbors = 3

# define a new record
row = [39,71,84,20.28155898,16.39535215,8.140825437,82.52339655]
# prediction
label = predict_classification(CropDataset, row, num_neighbors)
print('New record =%s, Predicted Output: %s' % (row, label))

[chickpea] => 0
[kidneybeans] => 1
New record =[39, 71, 84, 20.28155898, 16.39535215, 8.140825437, 82.52339655], Predicted Output: 0
