In [4]:
import os
import csv
import math
import operator
import random

# Step 1: Load dataset from file
def load_dataset_from_folder(folder_path):
    file_list = os.listdir(folder_path)
    data = []
    for file in file_list:
        if file.endswith('.csv'):
            with open(os.path.join(folder_path, file), 'r') as csvfile:
                csvreader = csv.reader(csvfile)
                for row in csvreader:
                    # Convert numerical attributes to float or int
                    row = [float(val) if val.replace('.', '').isdigit() else val for val in row]
                    data.append(row)
    if not data:
        raise ValueError("No CSV files found in the folder.")
    return data


# Step 2: Split the dataset into train and test sets
def train_test_split(data, split_ratio):
    train_size = int(len(data) * split_ratio)
    train_set = []
    test_set = list(data)
    while len(train_set) < train_size:
        index = int(math.floor(random.random() * len(test_set)))
        train_set.append(test_set.pop(index))
    return train_set, test_set

# Step 3: Implement K-NN Classifier
def euclidean_distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

def get_neighbors(training_set, test_instance, k):
    distances = []
    length = len(test_instance) - 1
    for x in range(len(training_set)):
        dist = euclidean_distance(test_instance, training_set[x], length)
        distances.append((training_set[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

def get_response(neighbors):
    class_votes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in class_votes:
            class_votes[response] += 1
        else:
            class_votes[response] = 1
    sorted_votes = sorted(class_votes.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_votes[0][0]

def get_accuracy(test_set, predictions):
    correct = 0
    for x in range(len(test_set)):
        if test_set[x][-1] == predictions[x]:
            correct += 1
    return (correct / float(len(test_set))) * 100.0

# Step 4: Evaluate the model
def evaluate_model(train_set, test_set, k):
    predictions = []
    for x in range(len(test_set)):
        neighbors = get_neighbors(train_set, test_set[x], k)
        result = get_response(neighbors)
        predictions.append(result)
    accuracy = get_accuracy(test_set, predictions)
    return accuracy

# Step 5: Plot k vs accuracy (Not implemented as we are not using any library for plotting)

# Main function
def main():
    # Load dataset from file
    file_path = "E:\\SRM\\Machine_Learning\\Lab\\Lab-7\\mtcars.csv"
    dataset = load_dataset_from_file(file_path)

    # Split dataset
    split_ratio = 0.8
    training_set, test_set = train_test_split(dataset, split_ratio)

    # Test model
    k = 3
    accuracy = evaluate_model(training_set, test_set, k)
    print("Accuracy:", accuracy)

if __name__ == "__main__":
    main()


TypeError: unsupported operand type(s) for -: 'str' and 'str'