# Decision Tree Classifier Example

## Data Loading and Preprocessing

In [1]:
# Import required libraries
import pandas as pd
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import sys

In [2]:
# Load dataset from CSV file
df = pd.read_csv("dataset.csv")
print("Original Dataset:")
print(df)

Original Dataset:
    Age  Experience  Rank Nationality   Go
0    36          10     9          UK   NO
1    42          12     4         USA   NO
2    23           4     6           N   NO
3    52           4     4         USA   NO
4    43          21     8         USA  YES
5    44          14     5          UK   NO
6    66           3     7           N  YES
7    35          14     9          UK  YES
8    52          13     7           N  YES
9    35           5     9           N  YES
10   24           3     5         USA   NO
11   18           3     7          UK  YES
12   45           9     9          UK  YES


In [3]:
# Convert categorical variables to numerical
# Nationality mapping: UK->0, USA->1, N->2
nationality_map = {'UK': 0, 'USA': 1, 'N': 2}
df['Nationality'] = df['Nationality'].map(nationality_map)

In [4]:
# Convert Go/No-Go to binary (1/0)
go_map = {'YES': 1, 'NO': 0}
df['Go'] = df['Go'].map(go_map)

print("\nProcessed Dataset:")
print(df)


Processed Dataset:
    Age  Experience  Rank  Nationality  Go
0    36          10     9            0   0
1    42          12     4            1   0
2    23           4     6            2   0
3    52           4     4            1   0
4    43          21     8            1   1
5    44          14     5            0   0
6    66           3     7            2   1
7    35          14     9            0   1
8    52          13     7            2   1
9    35           5     9            2   1
10   24           3     5            1   0
11   18           3     7            0   1
12   45           9     9            0   1


## Feature Selection

In [5]:
# Select all columns except last one as features
X = df.iloc[:,:-1]  # All rows, all columns except last
print("\nFeature Matrix (X):")
print(X)


Feature Matrix (X):
    Age  Experience  Rank  Nationality
0    36          10     9            0
1    42          12     4            1
2    23           4     6            2
3    52           4     4            1
4    43          21     8            1
5    44          14     5            0
6    66           3     7            2
7    35          14     9            0
8    52          13     7            2
9    35           5     9            2
10   24           3     5            1
11   18           3     7            0
12   45           9     9            0


In [6]:
# Select last column as target variable
y = df.iloc[:, -1]  # All rows, last column only
print("\nTarget Variable (y):")
print(y)


Target Variable (y):
0     0
1     0
2     0
3     0
4     1
5     0
6     1
7     1
8     1
9     1
10    0
11    1
12    1
Name: Go, dtype: int64


## Model Training and Visualization

In [None]:
# Create and train decision tree classifier
# Using default parameters (gini impurity, unlimited depth)
dtree = DecisionTreeClassifier()
dtree = dtree.fit(X, y)

# Visualize the decision tree
feature_names = ['Age', 'Experience', 'Rank', 'Nationality']
tree.plot_tree(dtree, feature_names=feature_names)

# Save visualization to output
plt.savefig(sys.stdout.buffer)
sys.stdout.flush()

AttributeError: 'OutStream' object has no attribute 'buffer'

## Notes
- The decision tree uses Gini impurity as the splitting criterion
- No max_depth is specified, so the tree grows until all leaves are pure
- Feature importance can be checked using dtree.feature_importances_
- For production use, consider adding train/test split and evaluation metrics