In [164]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder #--> To convert text to numbers (machine understands only numbers)

# Step 1: Load CSV
df = pd.read_csv("play.csv")
df

Unnamed: 0,Weather,Temperature,Humidity,Windy,Play
0,Sunny,Hot,High,False,No
1,Sunny,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Rain,Mild,High,False,Yes
4,Rain,Cool,Normal,False,Yes
5,Rain,Cool,Normal,True,No
6,Overcast,Cool,Normal,True,Yes
7,Sunny,Mild,High,False,No
8,Sunny,Cool,Normal,False,Yes
9,Rain,Mild,Normal,False,Yes


In [151]:
# Step 2: Create a LabelEncoder for each column
encoders = {}
for column in df.columns:
    le = LabelEncoder() # Use LabelEncoder to convert all string columns to numbers
    df[column] = le.fit_transform(df[column])
    encoders[column] = le  # store encoder for later use


In [153]:
le

In [155]:
# Step 3: Split data
X = df.drop(columns='Play')
Y = df['Play']
Y

0     0
1     0
2     1
3     1
4     1
5     0
6     1
7     0
8     1
9     1
10    1
11    1
12    1
13    0
Name: Play, dtype: int32

In [157]:
# Step 4: Train model
model = DecisionTreeClassifier()
model.fit(X, Y)



In [159]:
# Step 5: Use correct encoders to encode your test input
sample = [[
    encoders['Weather'].transform(['Sunny'])[0],
    encoders['Temperature'].transform(['Mild'])[0],
    encoders['Humidity'].transform(['Normal'])[0],
    encoders['Windy'].transform(['False'])[0]
]]
# Step 6: Predict
prediction = model.predict(sample)
print("Prediction (encoded):", prediction)

# Step 7: Decode the prediction if needed
decoded = encoders['Play'].inverse_transform(prediction)
print("Prediction (decoded):", decoded[0])


Prediction (encoded): [1]
Prediction (decoded): Yes




In [162]:
#checking the accuracy of the model
from sklearn.model_selection import train_test_split #it's a library that helps us to split our dataset into two dataset one for training and another for testing
from sklearn.metrics import accuracy_score
X_train, X_test, Y_train, Y_test =train_test_split(X,Y,test_size=0.4)#using 40 percent of the dataset for testing
model.fit(X_train,Y_train)
predictions1= model.predict(X_test)
score= accuracy_score(Y_test,predictions1)
score

0.6666666666666666

In [166]:
import joblib
joblib.dump(model, "weather_data.joblib")


['weather_data.joblib']

In [170]:
# visulalization of this model
from sklearn import tree

tree.export_graphviz(
    model,
    out_file="weather_data.dot",
    feature_names=['Weather', 'Temperature', 'Humidity', 'Windy'],  # correct here
    class_names=encoders['Play'].classes_,   # ['No', 'Yes']
    label='all',
    rounded=True,
    filled=True
)
