In [None]:
# Import necessary libraries
from sklearn import tree
from sklearn import metrics
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import datasets
import pandas as pd

# Load the dataset
spam_dataset = datasets.load_dataset('codesignal/sms-spam-collection', split='train')
spam_dataset = pd.DataFrame(spam_dataset)

# Define X (input features) and Y (output labels)
X = spam_dataset["message"]
Y = spam_dataset["label"]

# Perform the train test split using stratified cross-validation
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)

# Initialize the CountVectorizer
count_vectorizer = CountVectorizer()

# Fit and transform the training data
X_train_count = count_vectorizer.fit_transform(X_train)

# Transform the test data
X_test_count = count_vectorizer.transform(X_test)

# Initialize the DecisionTreeClassifier model with a max_depth parameter
decision_tree_model_v2 = tree.DecisionTreeClassifier(max_depth=10)

# Fit the model on the training data
decision_tree_model_v2.fit(X_train_count, Y_train)

# Make predictions on the test data
y_pred_v2 = decision_tree_model_v2.predict(X_test_count)

# Calculate the accuracy of the model
accuracy_v2 = metrics.accuracy_score(Y_test, y_pred_v2)

# Print the accuracy
print(f"Accuracy of Decision Tree Classifier with max_depth=5: {accuracy_v2:.2f}")