# Naive Bayes Classifier - Weather Play Prediction

In [1]:
# Import required libraries for Naive Bayes classification
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder

# Create the weather dataset with 14 training examples
# Features: Outlook, Temperature, Humidity, Windy | Target: Play (yes/no)
data = {
    'Outlook': ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'rainy', 'overcast', 
                'sunny', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'rainy'],
    'Temperature': ['hot', 'hot', 'hot', 'mild', 'cool', 'cool', 'cool', 'mild', 
                    'cool', 'mild', 'mild', 'mild', 'hot', 'mild'],
    'Humidity': ['high', 'high', 'high', 'high', 'normal', 'normal', 'normal', 'high', 
                 'normal', 'normal', 'normal', 'high', 'normal', 'high'],
    'Windy': ['false', 'true', 'false', 'false', 'false', 'true', 'true', 'false', 
              'false', 'false', 'true', 'true', 'false', 'true'],
    'Play': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Convert dictionary to DataFrame and display the dataset
df = pd.DataFrame(data)
print("Weather Dataset:")
print(df)
print(f"\nTotal: {len(df)} | Yes: {(df['Play']=='yes').sum()}, No: {(df['Play']=='no').sum()}")

# Create label encoders for each categorical feature
# Label encoding converts text categories into numbers (e.g., sunny=2, overcast=0, rainy=1)
le_outlook = LabelEncoder()
le_temp = LabelEncoder()
le_humidity = LabelEncoder()
le_windy = LabelEncoder()
le_play = LabelEncoder()

# Transform all categorical features to numerical values
# This is required because Naive Bayes needs numerical input
df['Outlook_Enc'] = le_outlook.fit_transform(df['Outlook'])
df['Temperature_Enc'] = le_temp.fit_transform(df['Temperature'])
df['Humidity_Enc'] = le_humidity.fit_transform(df['Humidity'])
df['Windy_Enc'] = le_windy.fit_transform(df['Windy'])
df['Play_Enc'] = le_play.fit_transform(df['Play'])

# Prepare feature matrix (X) and target vector (y) for training
X = df[['Outlook_Enc', 'Temperature_Enc', 'Humidity_Enc', 'Windy_Enc']]
y = df['Play_Enc']

# Create and train Gaussian Naive Bayes model
# GaussianNB assumes features follow a normal (Gaussian) distribution
model = GaussianNB()
model.fit(X, y)

# Calculate training accuracy by comparing predictions with actual values
y_pred = model.predict(X)
accuracy = (y_pred == y).sum() / len(y)
print(f"\nNaive Bayes Accuracy: {accuracy*100:.2f}%")

# Function to calculate conditional probability P(feature=value | Play=play)
# Example: P(Outlook=sunny | Play=yes) = count(sunny AND yes) / count(yes)
def calculate_probability(df, feature, value, play):
    subset = df[(df[feature] == value) & (df['Play'] == play)]
    total_play = df[df['Play'] == play]
    return len(subset) / len(total_play) if len(total_play) > 0 else 0

# Calculate prior probabilities P(yes) and P(no)
# These are the base probabilities without considering any features
p_yes = (df['Play'] == 'yes').sum() / len(df)
p_no = (df['Play'] == 'no').sum() / len(df)

print(f"\nPrior Probabilities:")
print(f"P(yes) = {p_yes:.4f}, P(no) = {p_no:.4f}")

# Returns: prediction (yes/no) and probability distribution [P(no), P(yes)]
def predict_play(outlook, temperature, humidity, windy):
    # Encode input features using the same encoders from training
    outlook_enc = le_outlook.transform([outlook])[0]
    temp_enc = le_temp.transform([temperature])[0]
    humidity_enc = le_humidity.transform([humidity])[0]
    windy_enc = le_windy.transform([windy])[0]
    # Create DataFrame with encoded features
    input_data = pd.DataFrame([[outlook_enc, temp_enc, humidity_enc, windy_enc]], 
                              columns=['Outlook_Enc', 'Temperature_Enc', 'Humidity_Enc', 'Windy_Enc'])
    # Get prediction and probability scores
    prediction = model.predict(input_data)[0]
    proba = model.predict_proba(input_data)[0]
    # Return decoded prediction and probabilities
    return le_play.inverse_transform([prediction])[0], proba

# Test the model with 3 different weather scenarios
print("\nTest Cases:")
tests = [('sunny', 'hot', 'high', 'false'), ('overcast', 'mild', 'normal', 'false'), ('rainy', 'cool', 'normal', 'true')]
for outlook, temp, hum, wind in tests:
    result, proba = predict_play(outlook, temp, hum, wind)
    print(f"{outlook}, {temp}, {hum}, {wind} -> {result} (no={proba[0]:.3f}, yes={proba[1]:.3f})")

print("\nModel ready.")


Weather Dataset:
     Outlook Temperature Humidity  Windy Play
0      sunny         hot     high  false   no
1      sunny         hot     high   true   no
2   overcast         hot     high  false  yes
3      rainy        mild     high  false  yes
4      rainy        cool   normal  false  yes
5      rainy        cool   normal   true   no
6   overcast        cool   normal   true  yes
7      sunny        mild     high  false   no
8      sunny        cool   normal  false  yes
9      rainy        mild   normal  false  yes
10     sunny        mild   normal   true  yes
11  overcast        mild     high   true  yes
12  overcast         hot   normal  false  yes
13     rainy        mild     high   true   no

Total: 14 | Yes: 9, No: 5

Naive Bayes Accuracy: 92.86%

Prior Probabilities:
P(yes) = 0.6429, P(no) = 0.3571

Test Cases:
sunny, hot, high, false -> no (no=0.801, yes=0.199)
overcast, mild, normal, false -> yes (no=0.001, yes=0.999)
rainy, cool, normal, true -> yes (no=0.109, yes=0.891)

Mo

In [3]:
custom_outlook = 'sunny'
custom_temperature = 'cool'
custom_humidity = 'high'
custom_windy = 'true'

result, proba = predict_play(custom_outlook, custom_temperature, custom_humidity, custom_windy)

print("Custom Prediction:")
print(f"Outlook: {custom_outlook}")
print(f"Temperature: {custom_temperature}")
print(f"Humidity: {custom_humidity}")
print(f"Windy: {custom_windy}")
print(f"Result: {result}")
print(f"Confidence: no={proba[0]*100:.2f}%, yes={proba[1]*100:.2f}%")

Custom Prediction:
Outlook: sunny
Temperature: cool
Humidity: high
Windy: true
Result: no
Confidence: no=89.15%, yes=10.85%
