In [2]:
import csv
import math
import random

def load_iris_data(filename):
    """Load Iris dataset, skipping headers and ID column"""
    data = []
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        next(reader)  # Skip header
        for row in reader:
            if not row: continue
            # Skip ID column (index 0), convert features to float
            features = [float(x) for x in row[1:-1]]
            label = row[-1]
            data.append(features + [label])
    return data

class NaiveBayes:
    def __init__(self):
        self.class_info = {}
        self.class_priors = {}

    def train(self, train_data):
        # Separate data by class
        class_dict = {}
        for record in train_data:
            features = record[:-1]
            label = record[-1]
            if label not in class_dict:
                class_dict[label] = []
            class_dict[label].append(features)
        
        # Calculate statistics for each class
        for label, samples in class_dict.items():
            # Calculate mean and stddev for each feature
            feature_stats = []
            for i in range(len(samples[0])):
                feature_vals = [s[i] for s in samples]
                mean = sum(feature_vals)/len(feature_vals)
                variance = sum((x-mean)**2 for x in feature_vals)/(len(feature_vals)-1)
                stddev = math.sqrt(variance)
                feature_stats.append((mean, stddev))
            
            self.class_info[label] = feature_stats
            self.class_priors[label] = len(samples)/len(train_data)

    def _gaussian_pdf(self, x, mean, stddev):
        """Calculate Gaussian probability density"""
        if stddev < 1e-9:  # Handle zero variance
            return 1.0 if x == mean else 0.0
        exponent = math.exp(-((x - mean)**2)/(2 * stddev**2))
        return (1/(math.sqrt(2 * math.pi) * stddev)) * exponent

    def predict(self, test_sample):
        """Predict class for a single sample"""
        max_prob = -1
        best_label = None
        
        for label, stats in self.class_info.items():
            prob = self.class_priors[label]
            for i, feature_value in enumerate(test_sample):
                mean, stddev = stats[i]
                prob *= self._gaussian_pdf(feature_value, mean, stddev)
            
            if prob > max_prob:
                max_prob = prob
                best_label = label
        return best_label

# Main execution
if __name__ == "__main__":
    # Load and prepare data
    dataset = load_iris_data("iris.csv")
    
    # Create label mapping
    labels = list(set(row[-1] for row in dataset))
    label_map = {label: i for i, label in enumerate(labels)}
    reverse_map = {v: k for k, v in label_map.items()}
    
    # Convert labels to integers
    for row in dataset:
        row[-1] = label_map[row[-1]]
    
    # Split into features and labels
    X = [row[:-1] for row in dataset]
    y = [row[-1] for row in dataset]
    
    # Create train-test split (80-20)
    combined = list(zip(X, y))
    random.shuffle(combined)
    split = int(0.8 * len(combined))
    train_set = combined[:split]
    test_set = combined[split:]
    
    # Prepare training data
    X_train = [item[0] for item in train_set]
    y_train = [item[1] for item in train_set]
    train_data = [x + [y] for x, y in zip(X_train, y_train)]
    
    # Train model
    nb = NaiveBayes()
    nb.train(train_data)
    
    # Test model
    correct = wrong = 0
    print("Test Results:")
    print("ID\tActual\t\tPredicted\tStatus")
    print("-" * 40)
    
    for idx, (test_features, actual_label) in enumerate(test_set, 1):
        predicted_label = nb.predict(test_features)
        status = "Correct" if predicted_label == actual_label else "Wrong"
        
        if status == "Correct":
            correct += 1
        else:
            wrong += 1
        
        print(f"{idx}\t{reverse_map[actual_label]:15}\t{reverse_map[predicted_label]:15}\t{status}")
    
    print("\nFinal Metrics:")
    print(f"Correct Predictions: {correct}")
    print(f"Wrong Predictions: {wrong}")
    print(f"Accuracy: {100*(correct/(correct+wrong)):.2f}%")


Test Results:
ID	Actual		Predicted	Status
----------------------------------------
1	Iris-versicolor	Iris-versicolor	Correct
2	Iris-setosa    	Iris-setosa    	Correct
3	Iris-virginica 	Iris-virginica 	Correct
4	Iris-versicolor	Iris-versicolor	Correct
5	Iris-setosa    	Iris-setosa    	Correct
6	Iris-setosa    	Iris-setosa    	Correct
7	Iris-virginica 	Iris-virginica 	Correct
8	Iris-virginica 	Iris-virginica 	Correct
9	Iris-versicolor	Iris-versicolor	Correct
10	Iris-setosa    	Iris-setosa    	Correct
11	Iris-versicolor	Iris-versicolor	Correct
12	Iris-setosa    	Iris-setosa    	Correct
13	Iris-setosa    	Iris-setosa    	Correct
14	Iris-setosa    	Iris-setosa    	Correct
15	Iris-versicolor	Iris-versicolor	Correct
16	Iris-virginica 	Iris-virginica 	Correct
17	Iris-virginica 	Iris-virginica 	Correct
18	Iris-setosa    	Iris-setosa    	Correct
19	Iris-virginica 	Iris-virginica 	Correct
20	Iris-versicolor	Iris-versicolor	Correct
21	Iris-versicolor	Iris-virginica 	Wrong
22	Iris-virginica 	Iris-v