In [None]:
1. * Write a Python program to train a Decision Tree Classifier on the Iris dataset and print the model accuracy*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Calculate and print the model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

2. Write a Python program to train a Decision Tree Classifier using Gini Impurity as the criterion and print the
feature importances*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier using Gini Impurity
clf = DecisionTreeClassifier(criterion='gini', random_state=42)
clf.fit(X_train, y_train)

# Get feature importances
feature_importances = clf.feature_importances_

# Create a DataFrame to display feature importances
feature_importances_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)

# Print feature importances
print(feature_importances_df)


3. Write a Python program to train a Decision Tree Classifier using Entropy as the splitting criterion and print the
model accuracy*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier using Entropy
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Calculate and print the model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


4. Write a Python program to train a Decision Tree Regressor on a housing dataset and evaluate using Mean
Squared Error (MSE)*

# Import necessary libraries
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the California Housing dataset
housing = fetch_california_housing()
X = housing.data
y = housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Regressor
regressor = DecisionTreeRegressor(random_state=42)
regressor.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = regressor.predict(X_test)

# Calculate and print the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")


5. Write a Python program to train a Decision Tree Classifier and visualize the tree using graphviz*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import graphviz

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Export the Decision Tree to a DOT file
dot_data = export_graphviz(clf, out_file=None,
                           feature_names=iris.feature_names,
                           class_names=iris.target_names,
                           filled=True, rounded=True,
                           special_characters=True)

# Visualize the Decision Tree using graphviz
graph = graphviz.Source(dot_data)
graph.render("decision_tree")

print("Decision Tree visualization saved as decision_tree.pdf")


6. Write a Python program to train a Decision Tree Classifier with a maximum depth of 3 and compare its
accuracy with a fully grown tree*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a fully grown Decision Tree Classifier
full_tree = DecisionTreeClassifier(random_state=42)
full_tree.fit(X_train, y_train)
y_pred_full = full_tree.predict(X_test)
accuracy_full = accuracy_score(y_test, y_pred_full)

# Train a Decision Tree Classifier with a maximum depth of 3
limited_tree = DecisionTreeClassifier(max_depth=3, random_state=42)
limited_tree.fit(X_train, y_train)
y_pred_limited = limited_tree.predict(X_test)
accuracy_limited = accuracy_score(y_test, y_pred_limited)

# Compare the accuracy of both trees
print(f"Accuracy of fully grown tree: {accuracy_full:.2f}")
print(f"Accuracy of tree with max depth 3: {accuracy_limited:.2f}")

# Check if limiting the depth improves or worsens the accuracy
if accuracy_limited > accuracy_full:
    print("Limiting the depth improves the accuracy.")
elif accuracy_limited < accuracy_full:
    print("Limiting the depth worsens the accuracy.")
else:
    print("Limiting the depth does not affect the accuracy.")

7. Write a Python program to train a Decision Tree Classifier using min_samples_split=5 and compare its
accuracy with a default tree*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a default Decision Tree Classifier
default_tree = DecisionTreeClassifier(random_state=42)
default_tree.fit(X_train, y_train)
y_pred_default = default_tree.predict(X_test)
accuracy_default = accuracy_score(y_test, y_pred_default)

# Train a Decision Tree Classifier with min_samples_split=5
min_samples_tree = DecisionTreeClassifier(min_samples_split=5, random_state=42)
min_samples_tree.fit(X_train, y_train)
y_pred_min_samples = min_samples_tree.predict(X_test)
accuracy_min_samples = accuracy_score(y_test, y_pred_min_samples)

# Compare the accuracy of both trees
print(f"Accuracy of default tree: {accuracy_default:.2f}")
print(f"Accuracy of tree with min_samples_split=5: {accuracy_min_samples:.2f}")

# Check if using min_samples_split improves or worsens the accuracy
if accuracy_min_samples > accuracy_default:
    print("Using min_samples_split=5 improves the accuracy.")
elif accuracy_min_samples < accuracy_default:
    print("Using min_samples_split=5 worsens the accuracy.")
else:
    print("Using min_samples_split=5 does not affect the accuracy.")


8. Write a Python program to apply feature scaling before training a Decision Tree Classifier and compare its
accuracy with unscaled data

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier on unscaled data
unscaled_tree = DecisionTreeClassifier(random_state=42)
unscaled_tree.fit(X_train, y_train)
y_pred_unscaled = unscaled_tree.predict(X_test)
accuracy_unscaled = accuracy_score(y_test, y_pred_unscaled)

# Apply feature scaling using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Decision Tree Classifier on scaled data
scaled_tree = DecisionTreeClassifier(random_state=42)
scaled_tree.fit(X_train_scaled, y_train)
y_pred_scaled = scaled_tree.predict(X_test_scaled)
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)

# Compare the accuracy of both trees
print(f"Accuracy of tree on unscaled data: {accuracy_unscaled:.2f}")
print(f"Accuracy of tree on scaled data: {accuracy_scaled:.2f}")

# Check if feature scaling improves or worsens the accuracy
if accuracy_scaled > accuracy_unscaled:
    print("Feature scaling improves the accuracy.")
elif accuracy_scaled < accuracy_unscaled:
    print("Feature scaling worsens the accuracy.")
else:
    print("Feature scaling does not affect the accuracy.")

9. Write a Python program to train a Decision Tree Classifier using One-vs-Rest (OvR) strategy for multiclass
classification*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier using One-vs-Rest (OvR) strategy
ovr_tree = OneVsRestClassifier(DecisionTreeClassifier(random_state=42))
ovr_tree.fit(X_train, y_train)
y_pred_ovr = ovr_tree.predict(X_test)

# Calculate the accuracy of the OvR classifier
accuracy_ovr = accuracy_score(y_test, y_pred_ovr)
print(f"Accuracy of OvR classifier: {accuracy_ovr:.2f}")

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_ovr))

10. Write a Python program to train a Decision Tree Classifier and display the feature importance scores*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.target_names

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Get feature importance scores
feature_importances = clf.feature_importances_

# Create a DataFrame to display feature importance scores
feature_importances_df = pd.DataFrame({
    'Feature': iris.feature_names,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)

# Display feature importance scores
print(feature_importances_df)

11. Write a Python program to train a Decision Tree Regressor with max_depth=5 and compare its performance
with an unrestricted tree

# Import necessary libraries
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the California Housing dataset
housing = fetch_california_housing()
X = housing.data
y = housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an unrestricted Decision Tree Regressor
unrestricted_tree = DecisionTreeRegressor(random_state=42)
unrestricted_tree.fit(X_train, y_train)
y_pred_unrestricted = unrestricted_tree.predict(X_test)
mse_unrestricted = mean_squared_error(y_test, y_pred_unrestricted)
rmse_unrestricted = np.sqrt(mse_unrestricted)

# Train a Decision Tree Regressor with max_depth=5
restricted_tree = DecisionTreeRegressor(max_depth=5, random_state=42)
restricted_tree.fit(X_train, y_train)
y_pred_restricted = restricted_tree.predict(X_test)
mse_restricted = mean_squared_error(y_test, y_pred_restricted)
rmse_restricted = np.sqrt(mse_restricted)

# Compare the performance of both trees
print(f"RMSE of unrestricted tree: {rmse_unrestricted:.2f}")
print(f"RMSE of tree with max_depth=5: {rmse_restricted:.2f}")

# Check if restricting the depth improves or worsens the performance
if rmse_restricted < rmse_unrestricted:
    print("Restricting the depth improves the performance.")
elif rmse_restricted > rmse_unrestricted:
    print("Restricting the depth worsens the performance.")
else:
    print("Restricting the depth does not affect the performance.")


12. Write a Python program to train a Decision Tree Classifier, apply Cost Complexity Pruning (CCP), and
visualize its effect on accuracy*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier and get the path for CCP
clf = DecisionTreeClassifier(random_state=42)
path = clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas = path.ccp_alphas

# Train Decision Tree Classifiers with different alpha values
accuracies = []
for ccp_alpha in ccp_alphas:
    clf = DecisionTreeClassifier(random_state=42, ccp_alpha=ccp_alpha)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

# Visualize the effect of CCP on accuracy
plt.figure(figsize=(10, 6))
plt.plot(ccp_alphas, accuracies, marker='o')
plt.xlabel('Alpha')
plt.ylabel('Accuracy')
plt.title('Effect of Cost Complexity Pruning on Accuracy')
plt.show()


13. Write a Python program to train a Decision Tree Classifier and evaluate its performance using Precision,
Recall, and F1-Score*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Calculate Precision, Recall, and F1-Score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print the metrics
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


14. Write a Python program to train a Decision Tree Classifier and visualize the confusion matrix using seaborn*

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Visualize the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap='Blues', fmt='d',
            xticklabels=iris.target_names, yticklabels=iris.target_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()


15. Write a Python program to train a Decision Tree Classifier and use GridSearchCV to find the optimal values
for max_depth and min_samples_split.

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameter grid
param_grid = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10]
}

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Print the best parameters and the best score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_:.2f}")

# Train a Decision Tree Classifier with the best parameters
best_clf = DecisionTreeClassifier(**grid_search.best_params_, random_state=42)
best_clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = best_clf.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on Test Set: {accuracy:.2f}")



