In [1]:
# Import necessary libraries
from sklearn.tree import DecisionTreeClassifier, export_text
import pandas as pd

# Define the dataset as a dictionary
data_dict = {
    'Age': ['Young', 'Young', 'Old', 'Old', 'Young', 'Old'],
    'Income': [3000, 1000, 4000, 1500, 2000, 2500],
    'Buys_Car': ['No', 'No', 'Yes', 'No', 'Yes', 'Yes']
}

# Convert the dictionary into a DataFrame
df = pd.DataFrame(data_dict)

df.head()

Unnamed: 0,Age,Income,Buys_Car
0,Young,3000,No
1,Young,1000,No
2,Old,4000,Yes
3,Old,1500,No
4,Young,2000,Yes


In [2]:
# One-hot encode categorical variables
df = pd.get_dummies(df, columns=['Age'], drop_first=True)  # Drop first to avoid multicollinearity

df.head()

Unnamed: 0,Income,Buys_Car,Age_Young
0,3000,No,True
1,1000,No,True
2,4000,Yes,False
3,1500,No,False
4,2000,Yes,True


In [3]:
# Map target variable to binary manually (No=0, Yes=1)
df['Buys_Car'] #"No  "

0     No
1     No
2    Yes
3     No
4    Yes
5    Yes
Name: Buys_Car, dtype: object

In [4]:
df['Buys_Car'] = df['Buys_Car'].str.strip()


In [5]:
# Strip leading/trailing spaces and ensure consistent case

# Map target variable to binary manually (No=0, Yes=1)
df['Buys_Car'] = df['Buys_Car'].map({'No': 0, 'Yes': 1})

# Check the column
(df['Buys_Car'])


0    0
1    0
2    1
3    0
4    1
5    1
Name: Buys_Car, dtype: int64

In [15]:

# Define features (X) and target (y)
X = df[['Income', 'Age_Young']]  # Use the one-hot encoded column
y = df['Buys_Car'] #Target Column


In [16]:

# Train a decision tree classifier
clf = DecisionTreeClassifier(criterion='entropy')
clf.fit(X, y)


In [17]:

# Display the decision tree rules
decision_tree_rules = export_text(clf, feature_names=['Income', 'Age_Young'])
print(decision_tree_rules)

|--- Income <= 0.25
|   |--- class: 0
|--- Income >  0.25
|   |--- Income <= 0.58
|   |   |--- class: 1
|   |--- Income >  0.58
|   |   |--- Age_Young <= 0.50
|   |   |   |--- class: 1
|   |   |--- Age_Young >  0.50
|   |   |   |--- class: 0



#### Not Covered in Class

In [7]:
# Manual Min-Max Scaling for 'Income'
min_income = df['Income'].min()
max_income = df['Income'].max()
df['Income'] = (df['Income'] - min_income) / (max_income - min_income)

# Define features (X) and target (y)
X = df[['Income', 'Age_Young']]  # Use the one-hot encoded column
y = df['Buys_Car']

# Train a decision tree classifier
clf = DecisionTreeClassifier(criterion='entropy', random_state=0)
clf.fit(X, y)

# Display the decision tree rules
decision_tree_rules = export_text(clf, feature_names=['Income', 'Age_Young'])
print("Decision Tree Rules:")
print(decision_tree_rules)



Decision Tree Rules:
|--- Income <= 0.25
|   |--- class: 0
|--- Income >  0.25
|   |--- Income <= 0.58
|   |   |--- class: 1
|   |--- Income >  0.58
|   |   |--- Income <= 0.83
|   |   |   |--- class: 0
|   |   |--- Income >  0.83
|   |   |   |--- class: 1



In [8]:
# Print the scaled DataFrame for reference
print("\nScaled DataFrame:")
print(df)


Scaled DataFrame:
     Income  Buys_Car  Age_Young
0  0.666667         0       True
1  0.000000         0       True
2  1.000000         1      False
3  0.166667         0      False
4  0.333333         1       True
5  0.500000         1      False


In [9]:
# Import necessary libraries
from sklearn.tree import DecisionTreeClassifier, export_text
import pandas as pd

# Define the dataset as a dictionary
data_ = {
    'Age': ['Young', 'Young', 'Old', 'Old', 'Young', 'Old'],
    'Income': [3000, 1000, 4000, 1500, 2000, 2500],
    'Buys_Car': ['No', 'No', 'Yes', 'No', 'Yes', 'Yes']
}

# Convert the dictionary into a DataFrame
data = pd.DataFrame(data_)

data.head()

Unnamed: 0,Age,Income,Buys_Car
0,Young,3000,No
1,Young,1000,No
2,Old,4000,Yes
3,Old,1500,No
4,Young,2000,Yes


In [10]:
# One-hot encode categorical variables
data = pd.get_dummies(data, columns=['Age'], drop_first=True)  # Drop first to avoid multicollinearity

data.head()

Unnamed: 0,Income,Buys_Car,Age_Young
0,3000,No,True
1,1000,No,True
2,4000,Yes,False
3,1500,No,False
4,2000,Yes,True


In [11]:
# Create interaction term to prioritize 'Age_Young'
data['Age_Income_Interaction'] = data['Age_Young'] * data['Income']

data

Unnamed: 0,Income,Buys_Car,Age_Young,Age_Income_Interaction
0,3000,No,True,3000
1,1000,No,True,1000
2,4000,Yes,False,0
3,1500,No,False,0
4,2000,Yes,True,2000
5,2500,Yes,False,0


In [12]:
# Define features (X) and target (y)
X_ = data[['Age_Young', 'Income', 'Age_Income_Interaction']]
y_ = data['Buys_Car']

# Train the decision tree
clf = DecisionTreeClassifier(criterion='entropy', random_state=0)
clf.fit(X_, y_)

# Display the decision tree rules
decision_tree_rules = export_text(clf, feature_names=['Age_Young', 'Income', 'Age_Income_Interaction'])
print("Decision Tree Rules:")
print(decision_tree_rules)


Decision Tree Rules:
|--- Income <= 1750.00
|   |--- class: No
|--- Income >  1750.00
|   |--- Age_Income_Interaction <= 2500.00
|   |   |--- class: Yes
|   |--- Age_Income_Interaction >  2500.00
|   |   |--- class: No



In [13]:
print(clf.classes_)  # Output: ['No', 'Yes']


['No' 'Yes']
