# Decision Tree Classifier


In [3]:
# Decision Tree Play Prediction - single cell
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1) Load dataset
df = pd.read_csv('weatherHistory.csv')
print(f"Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns")

# 2) Select features + clean
cols = ['Summary','Precip Type','Temperature (C)','Humidity','Wind Speed (km/h)','Visibility (km)']
df_clean = df[cols].dropna().copy()

# 3) Create target: Play (1) / Not Play (0)
def determine_play(r):
    temp_ok = 15 <= r['Temperature (C)'] <= 25
    humidity_ok = r['Humidity'] < 0.7
    wind_ok = r['Wind Speed (km/h)'] < 20
    visibility_ok = r['Visibility (km)'] > 5
    no_rain = r['Precip Type'] != 'rain'
    return 1 if sum([temp_ok, humidity_ok, wind_ok, visibility_ok, no_rain]) >= 4 else 0

df_clean['Play'] = df_clean.apply(determine_play, axis=1)
df_sample = df_clean.sample(n=min(5000, len(df_clean)), random_state=42).reset_index(drop=True)
print(f"Samples used: {len(df_sample)} | Play=1: {int(df_sample.Play.sum())}, Play=0: {len(df_sample)-int(df_sample.Play.sum())}")

# 4) Encode categoricals
le_summary = LabelEncoder()
le_precip = LabelEncoder()
df_sample['Summary_Enc'] = le_summary.fit_transform(df_sample['Summary'])
df_sample['Precip_Enc'] = le_precip.fit_transform(df_sample['Precip Type'])

X = df_sample[['Summary_Enc','Precip_Enc','Temperature (C)','Humidity','Wind Speed (km/h)','Visibility (km)']]
y = df_sample['Play']

# 5) Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print(f"Train: {len(X_train)} | Test: {len(X_test)}")

# 6) Train Decision Tree (tuned lightly to avoid overfit)
clf = DecisionTreeClassifier(criterion='gini', max_depth=6, min_samples_split=10, random_state=42)
clf.fit(X_train, y_train)

# 7) Evaluate
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("\n" )
print(f"Decision Tree Accuracy: {acc*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Play','Play']))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# 9) Prediction helper
def predict_play(summary, precip, temp, humidity, wind, visibility):
    try:
        s = le_summary.transform([summary])[0]
    except:
        s = 0
    try:
        p = le_precip.transform([precip])[0]
    except:
        p = 0
    inp = np.array([[s, p, temp, humidity, wind, visibility]])
    pred = clf.predict(inp)[0]
    proba = clf.predict_proba(inp)[0]
    return pred, proba

# 10) Quick examples
print("\n")
print("TEST EXAMPLES")
p1, pr1 = predict_play('Clear', 'rain', 22.0, 0.5, 12.0, 15.0)
label1 = 'PLAY' if p1 == 1 else 'NOT PLAY'
print(f"Good Weather -> {label1} (Conf {pr1[p1]*100:.1f}%)")
p2, pr2 = predict_play('Mostly Cloudy', 'rain', 5.0, 0.9, 30.0, 3.0)
label2 = 'PLAY' if p2 == 1 else 'NOT PLAY'
print(f"Bad Weather -> {label2} (Conf {pr2[p2]*100:.1f}%)")

print("\nModel ready.")

Dataset loaded: 96453 rows, 12 columns
Samples used: 5000 | Play=1: 733, Play=0: 4267
Train: 3500 | Test: 1500


Decision Tree Accuracy: 99.87%

Classification Report:
              precision    recall  f1-score   support

    Not Play       1.00      1.00      1.00      1280
        Play       1.00      1.00      1.00       220

    accuracy                           1.00      1500
   macro avg       1.00      1.00      1.00      1500
weighted avg       1.00      1.00      1.00      1500

Confusion Matrix:
[[1279    1]
 [   1  219]]


TEST EXAMPLES
Good Weather -> PLAY (Conf 100.0%)
Bad Weather -> NOT PLAY (Conf 100.0%)

Model ready.
Samples used: 5000 | Play=1: 733, Play=0: 4267
Train: 3500 | Test: 1500


Decision Tree Accuracy: 99.87%

Classification Report:
              precision    recall  f1-score   support

    Not Play       1.00      1.00      1.00      1280
        Play       1.00      1.00      1.00       220

    accuracy                           1.00      1500
   macro a

## Prediction (enter own values)

In [5]:
# Change these values and run just this cell (after running the main model cell once)
custom_summary = 'Partly Cloudy'
custom_precip = 'rain'
custom_temp = 21.0
custom_humidity = 0.55
custom_wind = 10.0
custom_visibility = 12.0

pred, prob = predict_play(custom_summary, custom_precip, custom_temp, custom_humidity, custom_wind, custom_visibility)
label = 'PLAY' if pred == 1 else 'DO NOT PLAY'
print('CUSTOM DECISION TREE PREDICTION')

print(f'Summary:       {custom_summary}')
print(f'Precip Type:   {custom_precip}')
print(f'Temperature:   {custom_temp}°C')
print(f'Humidity:      {custom_humidity*100:.0f}%')
print(f'Wind Speed:    {custom_wind} km/h')
print(f'Visibility:    {custom_visibility} km')
print(f'Result:        {label}')
print(f'Confidence:    {prob[pred]*100:.2f}%')
print(f'Probabilities: Not Play={prob[0]*100:.1f}%, Play={prob[1]*100:.1f}%')


CUSTOM DECISION TREE PREDICTION
Summary:       Partly Cloudy
Precip Type:   rain
Temperature:   21.0°C
Humidity:      55%
Wind Speed:    10.0 km/h
Visibility:    12.0 km
Result:        PLAY
Confidence:    100.00%
Probabilities: Not Play=0.0%, Play=100.0%
