## Классификационное дерево  
[Теория sklearn](https://scikit-learn.org/stable/modules/tree.html)

Датасет: Препарат который был назначен пациенту

In [ ]:
# Блок с используемыми библиотеками
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

from sklearn.preprocessing import LabelEncoder

!pip install pydotplus

import numpy as np

In [ ]:
df = pd.read_csv('resources/decision_tree.csv')
df.head()

In [ ]:
df.info()

In [ ]:
df.isnull().sum()

In [ ]:
df.describe().T

In [ ]:
plt.figure(figsize=(8, 6))

plt.subplot(3, 3, 1)
sns.countplot(x='Sex', data=df, palette='viridis')
plt.title('Countplot of Sex')

plt.subplot(3, 3, 2)
sns.countplot(x='BP', data=df, palette='viridis')
plt.title('Countplot of Blood_Pressure')

plt.subplot(3, 3, 3)  # Corrected subplot position
sns.countplot(x='Cholesterol', data=df, palette='viridis')
plt.title('Countplot of Cholesterol')

plt.subplot(3, 3, 4)
sns.countplot(x='Drug', data=df, palette='viridis')
plt.title('Countplot of Drug')

plt.tight_layout()
plt.show()

In [ ]:
transform = df
le_sex = LabelEncoder()
le_BP = LabelEncoder()
le_cholesterol = LabelEncoder()
transform['Sex'] = le_sex.fit_transform(transform['Sex'])
transform['BP'] = le_BP.fit_transform(transform['BP'])
transform['Cholesterol'] = le_cholesterol.fit_transform(transform['Cholesterol'])
transform['Drug'] = le_cholesterol.fit_transform(transform['Drug'])
transform.head()

In [ ]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
le_sex = LabelEncoder()
le_BP = LabelEncoder()
le_cholesterol = LabelEncoder()
X['Sex'] = le_sex.fit_transform(X['Sex'])
X['BP'] = le_BP.fit_transform(X['BP'])
X['Cholesterol'] = le_cholesterol.fit_transform(X['Cholesterol'])
X['Cholesterol'] = le_cholesterol.fit_transform(X['Cholesterol'])
X.info()

In [ ]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [ ]:
dt = DecisionTreeClassifier(max_depth=3, min_samples_leaf=10, random_state=1)
dt.fit(X, y)

In [ ]:
from sklearn.tree import plot_tree

unique_classes = np.unique(y_train)
plt.figure(figsize=(20, 10))
plot_tree(dt, class_names=unique_classes, filled=True)
plt.show()

In [ ]:
dt = DecisionTreeClassifier(random_state=1)
dt.fit(X_train, y_train)

y_pred_train = dt.predict(X_train)
y_pred = dt.predict(X_test)
y_prob = dt.predict_proba(X_test)

In [ ]:
print('Accuracy of Decision Tree-Train: ', accuracy_score(y_pred_train, y_train))
print('Accuracy of Decision Tree-Test: ', accuracy_score(y_pred, y_test))

In [ ]:
print(classification_report(y_test, y_pred))