In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
data = {
    'Attendance': ['high', 'high', 'low', 'low', 'low', 'high'],
    'Studyhours': ['high', 'low', 'high', 'low', 'low', 'high'],
    'Previousgrade': ['good', 'average', 'average', 'poor', 'poor', 'good'],
    'Result': ['pass', 'pass', 'pass', 'fail', 'fail', 'pass']
}

df = pd.DataFrame(data)
print("Original Dataset:\n", df
encoder = LabelEncoder()
for column in df.columns:
    df[column] = encoder.fit_transform(df[column])
print("\nEncoded Dataset:\n", df)
X = df.drop('Result', axis=1)
y = df['Result']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
dt_default = DecisionTreeClassifier(random_state=42)
dt_default.fit(X_train, y_train)

y_pred_default = dt_default.predict(X_test)

print("\nDefault Decision Tree Accuracy:",
      accuracy_score(y_test, y_pred_default))
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 3, 4, 5],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid = GridSearchCV(
    estimator=DecisionTreeClassifier(random_state=42),
    param_grid=param_grid,
    cv=2,
    scoring='accuracy'
)

grid.fit(X_train, y_train)

print("\nBest Parameters Found:")
print(grid.best_params_)

# Best model
best_model = grid.best_estimator_
y_pred_tuned = best_model.predict(X_test)

print("\nTuned Decision Tree Accuracy:",
      accuracy_score(y_test, y_pred_tuned))

print("\nConfusion Matrix:\n",
      confusion_matrix(y_test, y_pred_tuned))

print("\nClassification Report:\n",
      classification_report(y_test, y_pred_tuned))


Original Dataset:
   Attendance Studyhours Previousgrade Result
0       high       high          good   pass
1       high        low       average   pass
2        low       high       average   pass
3        low        low          poor   fail
4        low        low          poor   fail
5       high       high          good   pass

Encoded Dataset:
    Attendance  Studyhours  Previousgrade  Result
0           0           0              1       1
1           0           1              0       1
2           1           0              0       1
3           1           1              2       0
4           1           1              2       0
5           0           0              1       1

Default Decision Tree Accuracy: 0.5

Best Parameters Found:
{'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}

Tuned Decision Tree Accuracy: 0.5

Confusion Matrix:
 [[0 0]
 [1 1]]

Classification Report:
               precision    recall  f1-score   support

     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
