# Naive Bayes Classifier - Short Version
Complete implementation in a single code cell for quick execution

In [1]:
# Import libraries
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
df = pd.read_csv('weatherHistory.csv')
print(f"Dataset loaded: {df.shape[0]} records")

# Select features and clean data
df_clean = df[['Summary', 'Precip Type', 'Temperature (C)', 
               'Humidity', 'Wind Speed (km/h)', 'Visibility (km)']].dropna()

# Create target variable (Play: 1=Yes, 0=No)
def determine_play(row):
    temp_ok = 15 <= row['Temperature (C)'] <= 25
    humidity_ok = row['Humidity'] < 0.7
    wind_ok = row['Wind Speed (km/h)'] < 20
    visibility_ok = row['Visibility (km)'] > 5
    no_rain = row['Precip Type'] != 'rain'
    return 1 if sum([temp_ok, humidity_ok, wind_ok, visibility_ok, no_rain]) >= 4 else 0

df_clean['Play'] = df_clean.apply(determine_play, axis=1)
df_sample = df_clean.sample(n=min(5000, len(df_clean)), random_state=42)
print(f"Samples: {len(df_sample)}, Play=1: {sum(df_sample['Play'])}, Play=0: {len(df_sample)-sum(df_sample['Play'])}")

# Encode categorical features
le_summary = LabelEncoder()
le_precip = LabelEncoder()
df_encoded = df_sample.copy()
df_encoded['Summary_Encoded'] = le_summary.fit_transform(df_encoded['Summary'])
df_encoded['Precip_Type_Encoded'] = le_precip.fit_transform(df_encoded['Precip Type'])

# Prepare features and target
X = df_encoded[['Summary_Encoded', 'Precip_Type_Encoded', 'Temperature (C)', 
                'Humidity', 'Wind Speed (km/h)', 'Visibility (km)']]
y = df_encoded['Play']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print(f"Training: {len(X_train)}, Testing: {len(X_test)}")

# Train model
model = GaussianNB()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Play', 'Play']))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Prediction function
def predict_play(summary, precip, temp, humidity, wind, visibility):
    try:
        s_enc = le_summary.transform([summary])[0]
    except:
        s_enc = 0
    try:
        p_enc = le_precip.transform([precip])[0]
    except:
        p_enc = 0
    input_data = np.array([[s_enc, p_enc, temp, humidity, wind, visibility]])
    pred = model.predict(input_data)[0]
    prob = model.predict_proba(input_data)[0]
    return pred, prob

# Test predictions
print("\n")
print("TEST PREDICTIONS")

# Example 1: Good weather
pred1, prob1 = predict_play('Clear', 'rain', 22.0, 0.5, 12.0, 15.0)
print("\nExample 1 (Good Weather):")
print(f"Input: Clear, 22°C, 50% humidity, 12 km/h wind, 15 km visibility")
print(f"Result: {'PLAY' if pred1==1 else 'NOT PLAY'} (Confidence: {prob1[pred1]*100:.1f}%)")

# Example 2: Bad weather
pred2, prob2 = predict_play('Mostly Cloudy', 'rain', 5.0, 0.9, 30.0, 3.0)
print("\nExample 2 (Bad Weather):")
print(f"Input: Cloudy, 5°C, 90% humidity, 30 km/h wind, 3 km visibility")
print(f"Result: {'PLAY' if pred2==1 else 'NOT PLAY'} (Confidence: {prob2[pred2]*100:.1f}%)")

print("\n")
print("Model ready for predictions!")

Dataset loaded: 96453 records
Samples: 5000, Play=1: 733, Play=0: 4267
Training: 3500, Testing: 1500

Model Accuracy: 86.80%

Classification Report:
              precision    recall  f1-score   support

    Not Play       0.96      0.88      0.92      1280
        Play       0.53      0.80      0.64       220

    accuracy                           0.87      1500
   macro avg       0.75      0.84      0.78      1500
weighted avg       0.90      0.87      0.88      1500


Confusion Matrix:
[[1125  155]
 [  43  177]]


TEST PREDICTIONS

Example 1 (Good Weather):
Input: Clear, 22°C, 50% humidity, 12 km/h wind, 15 km visibility
Result: PLAY (Confidence: 72.5%)

Example 2 (Bad Weather):
Input: Cloudy, 5°C, 90% humidity, 30 km/h wind, 3 km visibility
Result: NOT PLAY (Confidence: 100.0%)


Model ready for predictions!
Samples: 5000, Play=1: 733, Play=0: 4267
Training: 3500, Testing: 1500

Model Accuracy: 86.80%

Classification Report:
              precision    recall  f1-score   support

 


# Test My data

In [2]:
# INPUT - Change these values
my_summary = 'Partly Cloudy'
my_precip = 'rain'
my_temp = 20.0
my_humidity = 0.6
my_wind = 15.0
my_visibility = 10.0

# Get prediction
my_pred, my_prob = predict_play(my_summary, my_precip, my_temp, my_humidity, my_wind, my_visibility)

print("PREDICTION RESULT")
print(f"Weather: {my_summary}")
print(f"Temperature: {my_temp}°C")
print(f"Humidity: {my_humidity*100:.0f}%")
print(f"Wind Speed: {my_wind} km/h")
print(f"Visibility: {my_visibility} km")
print(f"Prediction: {'PLAY' if my_pred==1 else 'DO NOT PLAY'}")
print(f"Confidence: {my_prob[my_pred]*100:.2f}%")
print(f"Probabilities: Not Play={my_prob[0]*100:.1f}%, Play={my_prob[1]*100:.1f}%")

PREDICTION RESULT
Weather: Partly Cloudy
Temperature: 20.0°C
Humidity: 60%
Wind Speed: 15.0 km/h
Visibility: 10.0 km
Prediction: PLAY
Confidence: 65.65%
Probabilities: Not Play=34.3%, Play=65.7%
