In [32]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier

def load_data():
    data = load_wine()
    # print(data.target_names)
    X=pd.DataFrame(data.data,columns=data.feature_names)
    Y=pd.Series(data.target,name='target')
    # print(X.describe())
    # print(X.corr())
    # print(X.info())
    return X,Y

def manual_depth_search(X_train, X_test, Y_train, Y_test):
    for depth in range(1, 15):
        model = DecisionTreeClassifier(criterion="gini",random_state=42,max_depth=depth, min_samples_leaf=5)
        model.fit(X_train, Y_train)
    train_acc = accuracy_score(Y_train, model.predict(X_train))
    test_acc = accuracy_score(Y_test, model.predict(X_test))
    print(f"Depth {depth} | Train: {train_acc:.3f} | Test: {test_acc:.3f}")

def grid_search_tuning(X_train, y_train):
    print("\nGridSearchCV Results:\n")
    param_grid = {
        "max_depth": [2, 3, 4, 5, 6, 7],
        "min_samples_leaf": [1, 3, 5, 7]
    }
    grid = GridSearchCV(DecisionTreeClassifier(random_state=42),param_grid,cv=5,scoring="accuracy")
    grid.fit(X_train, y_train)
    print("Best Parameters:", grid.best_params_)
    print("Best CV Score:", grid.best_score_)
    return grid.best_estimator_

def main():
    X,Y=load_data()
    X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)
    
    manual_depth_search(X_train, X_test, Y_train, Y_test)
    
    best_model = grid_search_tuning(X_train, Y_train)
    test_accuracy = accuracy_score(Y_test, best_model.predict(X_test))
    print("\nFinal Test Accuracy (GridSearch Model):", test_accuracy*100)

if __name__ == "__main__":
    main()



Depth 14 | Train: 0.972 | Test: 0.944

GridSearchCV Results:

Best Parameters: {'max_depth': 3, 'min_samples_leaf': 1}
Best CV Score: 0.9224137931034484

Final Test Accuracy (GridSearch Model): 94.44444444444444
