In [None]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report,accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('/content/sample_data/drug_data_decisiontree_100k.csv')


In [None]:
df.head(5)

In [None]:
le_sex = LabelEncoder()
le_bp = LabelEncoder()
le_Cholesterol = LabelEncoder()
le_drug = LabelEncoder()

df["Sex"]= le_sex.fit_transform(df['Sex'])
df["BP"]= le_bp.fit_transform(df['BP'])
df['Cholesterol']=le_Cholesterol.fit_transform(df['Cholesterol'])
df['Drug']=le_drug.fit_transform(df['Drug'])

X = df[["Age",'Sex','BP','Cholesterol','Na_to_K']]
y=df['Drug']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
model=DecisionTreeClassifier(criterion='gini',max_depth=5,random_state=42)
model.fit(X_train,y_train)

In [None]:
y_pred=model.predict(X_test)

print("Accuracy",accuracy_score(y_test,y_pred))
print('Classifier Report',classification_report(y_test,y_pred,target_names=le_drug.classes_))

In [None]:
plt.figure(figsize=(20,10))
plot_tree(model,feature_names= X.columns, class_names=le_drug.classes_, filled=True,rounded=True)
plt.title("Decision Tree for drug Classifier")
plt.show()

In [None]:
feature_importances=pd.Series(model.feature_importances_, index=X.columns)
sns.barplot(x=feature_importances.values, y=feature_importances.index)
plt.title("Feature Importance")
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load data
df = pd.read_csv('/content/sample_data/drug_data_decisiontree_100k.csv')


le_sex = LabelEncoder()
le_bp = LabelEncoder()
le_chol = LabelEncoder()
le_drug = LabelEncoder()

df['Sex'] = le_sex.fit_transform(df['Sex'])
df['BP'] = le_bp.fit_transform(df['BP'])
df['Cholesterol'] = le_chol.fit_transform(df['Cholesterol'])
df['Drug'] = le_drug.fit_transform(df['Drug'])

# Feature matrix and target vector
X = df[['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']]
y = df['Drug']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning with GridSearchCV
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 5, 10, 15, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid = GridSearchCV(
    estimator=DecisionTreeClassifier(random_state=42),
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)


best_model = grid.best_estimator_


y_pred = best_model.predict(X_test)

#
print("Best Hyperparameters:", grid.best_params_)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le_drug.classes_))


feature_importances = pd.Series(best_model.feature_importances_, index=X.columns)
sns.barplot(x=feature_importances.values, y=feature_importances.index)
plt.title("Feature Importance")
plt.show()


plt.figure(figsize=(20, 10))
plot_tree(best_model,
          feature_names=X.columns,
          class_names=le_drug.classes_,
          filled=True,
          rounded=True)
plt.title("Optimized Decision Tree for Drug Classification")
plt.show()
