In [4]:
import csv
import random
import math

# Load the dataset from CSV file
def load_csv(filename):
    dataset = []
    class_mapping = {'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}  # Mapping class names to numeric labels
    with open(filename, 'r') as file:
        csv_reader = csv.reader(file)
        next(csv_reader)  # Skip the header row
        for row in csv_reader:
            if not row:
                continue
            row[-1] = class_mapping[row[-1]]  # Convert class label to numeric value
            dataset.append([float(x) for x in row])
    return dataset

# Split dataset into train and test sets
def split_dataset(dataset, split_ratio):
    train_size = int(len(dataset) * split_ratio)
    train_set = []
    test_set = list(dataset)
    while len(train_set) < train_size:
        index = random.randrange(len(test_set))
        train_set.append(test_set.pop(index))
    return train_set, test_set

# Separate dataset by class
def separate_by_class(dataset):
    separated = {}
    for i in range(len(dataset)):
        vector = dataset[i]
        class_value = int(vector[-1])  # Assuming the last column is the class label
        if class_value not in separated:
            separated[class_value] = []
        separated[class_value].append(vector)
    return separated

# Calculate mean of a list of numbers
def mean(numbers):
    return sum(numbers) / float(len(numbers))

# Calculate standard deviation of a list of numbers
def stdev(numbers):
    avg = mean(numbers)
    variance = sum([(x - avg) ** 2 for x in numbers]) / float(len(numbers) - 1)
    return math.sqrt(variance)

# Summarize dataset
def summarize_dataset(dataset):
    summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
    del summaries[-1]  # Remove summary statistics for the class label
    return summaries

# Summarize data by class
def summarize_by_class(dataset):
    separated = separate_by_class(dataset)
    summaries = {}
    for class_value, instances in separated.items():
        summaries[class_value] = summarize_dataset(instances)
    return summaries

# Calculate Gaussian probability density function
def calculate_probability(x, mean, stdev):
    exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
    return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

# Calculate class probabilities
def calculate_class_probabilities(summaries, input_vector):
    probabilities = {}
    for class_value, class_summaries in summaries.items():
        probabilities[class_value] = 1
        for i in range(len(class_summaries)):
            mean, stdev = class_summaries[i]
            x = input_vector[i]
            probabilities[class_value] *= calculate_probability(x, mean, stdev)
    return probabilities

# Predict the class for a given input vector
def predict(summaries, input_vector):
    probabilities = calculate_class_probabilities(summaries, input_vector)
    best_label, best_prob = None, -1
    for class_value, probability in probabilities.items():
        if best_label is None or probability > best_prob:
            best_prob = probability
            best_label = class_value
    return best_label

# Convert numeric class label to class name
def class_label_to_name(class_label):
    class_names = {
        0: "Iris-setosa",
        1: "Iris-versicolor",
        2: "Iris-virginica"
    }
    return class_names.get(class_label, "Unknown")

# Test the Naive Bayes Classifier on the Iris dataset
def main():
    filename = 'C:/Users/LENOVO/Downloads/iris.csv'
    dataset = load_csv(filename)
    split_ratio = 0.57
    train_set, test_set = split_dataset(dataset, split_ratio)
    print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(train_set), len(test_set)))
    
    # Prepare model
    summaries = summarize_by_class(train_set)
    
    # Test model
    correct = 0
    for instance in test_set:
        predicted = predict(summaries, instance[:-1])  # Exclude the last element (class label) from the instance
        class_name = class_label_to_name(predicted)
        print('Data={0}, Predicted: {1}'.format(instance[:-1], class_name))
        if predicted == instance[-1]:
            correct += 1
    print('Accuracy: {0}%'.format((correct / float(len(test_set))) * 100))
    print('Data={0}, Predicted: {1} ({2})'.format(instance[:-1], class_label_to_name(predicted), predicted))


if __name__ == "__main__":
    main()


Split 150 rows into train=85 and test=65 rows
Data=[5.1, 3.5, 1.4, 0.2], Predicted: Iris-setosa
Data=[4.9, 3.0, 1.4, 0.2], Predicted: Iris-setosa
Data=[4.7, 3.2, 1.3, 0.2], Predicted: Iris-setosa
Data=[4.6, 3.1, 1.5, 0.2], Predicted: Iris-setosa
Data=[5.4, 3.9, 1.7, 0.4], Predicted: Iris-setosa
Data=[4.6, 3.4, 1.4, 0.3], Predicted: Iris-setosa
Data=[5.0, 3.4, 1.5, 0.2], Predicted: Iris-setosa
Data=[4.4, 2.9, 1.4, 0.2], Predicted: Iris-setosa
Data=[4.8, 3.4, 1.6, 0.2], Predicted: Iris-setosa
Data=[5.7, 4.4, 1.5, 0.4], Predicted: Iris-setosa
Data=[5.1, 3.8, 1.5, 0.3], Predicted: Iris-setosa
Data=[5.1, 3.7, 1.5, 0.4], Predicted: Iris-setosa
Data=[5.1, 3.3, 1.7, 0.5], Predicted: Iris-setosa
Data=[4.8, 3.4, 1.9, 0.2], Predicted: Iris-setosa
Data=[5.0, 3.0, 1.6, 0.2], Predicted: Iris-setosa
Data=[5.0, 3.4, 1.6, 0.4], Predicted: Iris-setosa
Data=[4.7, 3.2, 1.6, 0.2], Predicted: Iris-setosa
Data=[5.4, 3.4, 1.5, 0.4], Predicted: Iris-setosa
Data=[5.2, 4.1, 1.5, 0.1], Predicted: Iris-setosa
Data