In [1]:
import pandas as pd
df =    pd.read_csv(r'PlayTennis.csv')
print(df)


     Outlook Temperature Humidity    Wind Play Tennis
0      Sunny         Hot     High    Weak          No
1      Sunny         Hot     High  Strong          No
2   Overcast         Hot     High    Weak         Yes
3       Rain        Mild     High    Weak         Yes
4       Rain        Cool   Normal    Weak         Yes
5       Rain        Cool   Normal  Strong          No
6   Overcast        Cool   Normal  Strong         Yes
7      Sunny        Mild     High    Weak          No
8      Sunny        Cool   Normal    Weak         Yes
9       Rain        Mild   Normal    Weak         Yes
10     Sunny        Mild   Normal  Strong         Yes
11  Overcast        Mild     High  Strong         Yes
12  Overcast         Hot   Normal    Weak         Yes
13      Rain        Mild     High  Strong          No


In [None]:
data = df.drop('Play Tennis').values.tolist()  # Features
labels = df['Play Tennis'].values.tolist()  # Labels

# Function to calculate prior probabilities
def calculate_prior(labels):
    prior = {}
    total_samples = len(labels)
    for label in labels:
        if label not in prior:
            prior[label] = labels.count(label) / total_samples
    return prior

# Function to calculate conditional probabilities
def calculate_conditional(data, labels):
    conditional = {}
    all_classes = list(set(labels))  # Unique classes
    for i in range(len(data[0])):  # Loop through each feature
        for cls in all_classes:  # Loop through each class
            key = f"feature_{i}_given_{cls}"
            conditional[key] = {}
            subset_data = [data[j][i] for j in range(len(data)) if labels[j] == cls]
            total_count = len(subset_data)
            unique_values = set(subset_data)
            for val in unique_values:
                conditional[key][val] = subset_data.count(val) / total_count
    return conditional

# Function to predict using Naive Bayes
def predict(sample, prior, conditional):
    classes = list(prior.keys())
    probabilities = {cls: prior[cls] for cls in classes}
    for cls in classes:
        for i in range(len(sample)):
            key = f"feature_{i}_given_{cls}"
            if sample[i] in conditional[key]:
                probabilities[cls] *= conditional[key][sample[i]]
            else:
                probabilities[cls] *= 0  # Handling unseen values
    return max(probabilities, key=probabilities.get)

# Calculating prior probabilities
prior_probabilities = calculate_prior(labels)
# print(prior_probabilities)

# Calculating conditional probabilities
conditional_probabilities = calculate_conditional(data, labels)

# Testing the classifier
correct_predictions = 0
for idx, sample in enumerate(data):
    prediction = predict(sample, prior_probabilities, conditional_probabilities)
    # print(f'prediction: {prediction}, actual_value:{labels[idx]}')
    if prediction == labels[idx]:
        correct_predictions += 1
        
print(f'correct predictions are: {correct_predictions} out actual_values: {len(data)}')
# Calculate and display training accuracy
training_accuracy = correct_predictions / len(data) * 100
print(f"Training Accuracy: {training_accuracy:.2f}%")
