### 6. Write a Python program to:
- Load the Iris Dataset
- Train a Decision Tree Classifier using the Gini criterion
- Print the model’s accuracy and feature importances

In [27]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
x = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=4
)

# Train a Decision Tree Classifier using Gini criterion
clf = DecisionTreeClassifier(criterion='gini', random_state=4)
clf.fit(x_train, y_train)

# Predict and print accuracy
y_pred = clf.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Print feature importances
print("\nFeature Importances:")
for feature, importance in zip(feature_names, clf.feature_importances_):
    print(f"{feature:20}: {importance:.3f}")

Model Accuracy: 0.97

Feature Importances:
sepal length (cm)   : 0.017
sepal width (cm)    : 0.000
petal length (cm)   : 0.517
petal width (cm)    : 0.467


### 7. Write a Python program to:
- Load the Iris Dataset
- Train a Decision Tree Classifier with max_depth=3 and compare its accuracy to a fully-grown tree.

In [33]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 📥 Load the dataset
iris = load_iris()
x, y = iris.data, iris.target

# Split into training and test sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=1
)

# Fully-grown tree (no max_depth constraint)
full_tree = DecisionTreeClassifier(criterion='gini', random_state=1)
full_tree.fit(x_train, y_train)
full_pred = full_tree.predict(x_test)
full_accuracy = accuracy_score(y_test, full_pred)

# Limited-depth tree (max_depth=3)
shallow_tree = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=1)
shallow_tree.fit(x_train, y_train)
shallow_pred = shallow_tree.predict(x_test)
shallow_accuracy = accuracy_score(y_test, shallow_pred)

# Output comparison
print(f"Fully-grown Tree Accuracy: {full_accuracy:.2f}")
print(f"Shallow Tree Accuracy (max_depth=3): {shallow_accuracy:.2f}")

Fully-grown Tree Accuracy: 0.97
Shallow Tree Accuracy (max_depth=3): 0.97


### 8. Write a Python program to:
- Load the Boston Housing Dataset
- Train a Decision Tree Regressor
- Print the Mean Squared Error (MSE) and feature importances

In [46]:
#Boston Housing Dataset is removed from Sci-Kit Learning, so I'm making making model on California Housing Dataset
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load California Housing dataset
housing = fetch_california_housing()
X = housing.data
y = housing.target
feature_names = housing.feature_names

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train Decision Tree Regressor
regressor = DecisionTreeRegressor(random_state=42)
regressor.fit(X_train, y_train)

# Predict and calculate MSE
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Feature Importances
print("\nFeature Importances:")
for name, importance in zip(feature_names, regressor.feature_importances_):
    print(f"{name:20}: {importance:.3f}")

Mean Squared Error: 0.50

Feature Importances:
MedInc              : 0.529
HouseAge            : 0.052
AveRooms            : 0.053
AveBedrms           : 0.029
Population          : 0.031
AveOccup            : 0.131
Latitude            : 0.094
Longitude           : 0.083


### 9. Write a Python program to:
- Load the Iris Dataset
- Tune the Decision Tree’s max_depth and min_samples_split using GridSearchCV
- Print the best parameters and the resulting model accuracy

In [51]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1
)

# Create a Decision Tree Classifier
dtree = DecisionTreeClassifier(random_state=1)

# Define the parameter grid
param_grid = {
    'max_depth': [2, 3, 4, 5, 6],
    'min_samples_split': [2, 4, 6, 8]
}

# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(
    estimator=dtree,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

# Test set accuracy using best estimator
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Set Accuracy: {accuracy:.2f}")

Best Parameters: {'max_depth': 4, 'min_samples_split': 2}
Test Set Accuracy: 0.97
