In [None]:
import csv
import random
from collections import defaultdict

def load_dataset(filename):
    dataset = []
    with open(filename, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            dataset.append(row)
    return dataset

# Split dataset into train and test
def train_test_split(dataset, test_ratio=0.3):
    shuffled = dataset[:]
    random.shuffle(shuffled)
    test_size = int(len(dataset) * test_ratio)
    test_set = shuffled[:test_size]
    train_set = shuffled[test_size:]
    return train_set, test_set

# Calculate prior probabilities and likelihoods
def train_naive_bayes(train_data):
    class_probs = defaultdict(int)
    feature_probs = {}

    total_samples = len(train_data)

    # Count occurrences of each class
    for row in train_data:
        label = row['play']
        class_probs[label] += 1

    # Calculate prior probabilities
    for label in class_probs:
        class_probs[label] /= total_samples

    # Calculate conditional probabilities for each feature
    features = ['outlook', 'temp', 'humidity', 'wind']
    for feature in features:
        feature_probs[feature] = {}
        for label in class_probs.keys():
            # Count feature occurrences for each class
            feature_values = [row[feature] for row in train_data if row['play'] == label]
            total_label_count = len(feature_values)
            value_counts = defaultdict(int)
            for value in feature_values:
                value_counts[value] += 1

            # Conditional probability P(feature=value | class)
            feature_probs[feature][label] = {
                value: count / total_label_count for value, count in value_counts.items()
            }

    return class_probs, feature_probs

# Prediction using Naive Bayes
def predict(row, class_probs, feature_probs):
    posteriors = {}

    for label, prior in class_probs.items():
        prob = prior
        for feature in feature_probs:
            value = row[feature]
            if value in feature_probs[feature][label]:
                prob *= feature_probs[feature][label][value]
            else:
                prob *= 1e-6  # Smoothing for unseen feature values
        posteriors[label] = prob

    # Return the class with highest posterior probability
    return max(posteriors, key=posteriors.get)

# Compute accuracy
def compute_accuracy(test_data, class_probs, feature_probs):
    correct = 0
    for row in test_data:
        predicted = predict(row, class_probs, feature_probs)
        if predicted == row['play']:
            correct += 1
    return correct / len(test_data)

In [None]:

filename = "play_tennis.csv"
dataset = load_dataset(filename)

train_data, test_data = train_test_split(dataset, test_ratio=0.3)

class_probs, feature_probs = train_naive_bayes(train_data)
accuracy = compute_accuracy(test_data, class_probs, feature_probs)

print("\nNaïve Bayes Classifier Results:")
print("--------------------------------")
print("Training samples:", len(train_data))
print("Testing samples:", len(test_data))
print(f"Accuracy: {accuracy * 100:.2f}%\n")

print("Sample Predictions:")
for row in test_data:
    print(f"Day: {row['day']}, Predicted: {predict(row, class_probs, feature_probs)}, Actual: {row['play']}")



Naïve Bayes Classifier Results:
--------------------------------
Training samples: 10
Testing samples: 4
Accuracy: 75.00%

Sample Predictions:
Day: D10, Predicted: Yes, Actual: Yes
Day: D13, Predicted: Yes, Actual: Yes
Day: D6, Predicted: Yes, Actual: No
Day: D11, Predicted: Yes, Actual: Yes


In [3]:
# Test custom input
sample = {
    'outlook': 'Sunny',
    'temp': 'Cool',
    'humidity': 'Normal',
    'wind': 'Weak'
}

predicted = predict(sample, class_probs, feature_probs)
print(f"\nCustom input prediction → {predicted}")



Custom input prediction → Yes
