In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [4]:
# Step 1: Load the golf dataset
data = pd.read_csv("golf.csv")  # Replace "golf_dataset.csv" with your dataset file

In [5]:
# Step 2: Split the dataset into features (X) and labels (y)
X = data.drop('Play Golf', axis=1)
y = data['Play Golf']

In [6]:
# Step 3: Convert categorical variables into a numerical representation
X = pd.get_dummies(X)

In [7]:
# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Step 5: Create a Naive Bayes classifier
classifier = GaussianNB()

In [9]:
# Step 6: Train the classifier
classifier.fit(X_train, y_train)

In [10]:
# Step 7: Make predictions on the test set
y_pred = classifier.predict(X_test)

In [11]:
# Step 8: Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6666666666666666


In [12]:
# Step 9: Calculate conditional probabilities
classes = classifier.classes_

# Display conditional probabilities for each feature and class
for i, class_ in enumerate(classes):
    print(f"Class: {class_}")
    means = classifier.theta_[i]  # Means for the current class
    for feature, mean in zip(X.columns, means):
        print(f"Feature: {feature}, Mean: {mean}")
    print("\n")

Class: No
Feature: Windy, Mean: 0.75
Feature: Outlook_Overcast, Mean: 0.0
Feature: Outlook_Rainy, Mean: 0.5
Feature: Outlook_Sunny, Mean: 0.5
Feature: Temp_Cool, Mean: 0.25
Feature: Temp_Hot, Mean: 0.25
Feature: Temp_Mild, Mean: 0.5
Feature: Humidity_High, Mean: 0.75
Feature: Humidity_Normal, Mean: 0.25


Class: Yes
Feature: Windy, Mean: 0.2857142857142857
Feature: Outlook_Overcast, Mean: 0.42857142857142855
Feature: Outlook_Rainy, Mean: 0.2857142857142857
Feature: Outlook_Sunny, Mean: 0.2857142857142857
Feature: Temp_Cool, Mean: 0.42857142857142855
Feature: Temp_Hot, Mean: 0.2857142857142857
Feature: Temp_Mild, Mean: 0.2857142857142857
Feature: Humidity_High, Mean: 0.2857142857142857
Feature: Humidity_Normal, Mean: 0.7142857142857143




In [14]:
# Calculate Conditional Probabilities
for i,feature_name in enumerate(X.columns):
    print('Conditional probabilities for feature:', feature_name)
    for j, class_name in enumerate(classifier.classes_):
         mean = classifier.theta_[j, i]     
         print('P({}:{})~N({},)'.format(feature_name, class_name, mean))
print("____")

Conditional probabilities for feature: Windy
P(Windy:No)~N(0.75,)
P(Windy:Yes)~N(0.2857142857142857,)
Conditional probabilities for feature: Outlook_Overcast
P(Outlook_Overcast:No)~N(0.0,)
P(Outlook_Overcast:Yes)~N(0.42857142857142855,)
Conditional probabilities for feature: Outlook_Rainy
P(Outlook_Rainy:No)~N(0.5,)
P(Outlook_Rainy:Yes)~N(0.2857142857142857,)
Conditional probabilities for feature: Outlook_Sunny
P(Outlook_Sunny:No)~N(0.5,)
P(Outlook_Sunny:Yes)~N(0.2857142857142857,)
Conditional probabilities for feature: Temp_Cool
P(Temp_Cool:No)~N(0.25,)
P(Temp_Cool:Yes)~N(0.42857142857142855,)
Conditional probabilities for feature: Temp_Hot
P(Temp_Hot:No)~N(0.25,)
P(Temp_Hot:Yes)~N(0.2857142857142857,)
Conditional probabilities for feature: Temp_Mild
P(Temp_Mild:No)~N(0.5,)
P(Temp_Mild:Yes)~N(0.2857142857142857,)
Conditional probabilities for feature: Humidity_High
P(Humidity_High:No)~N(0.75,)
P(Humidity_High:Yes)~N(0.2857142857142857,)
Conditional probabilities for feature: Humidity_