In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report
import seaborn as sns

# Load dataset for demonstration
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a decision tree classifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=data.target_names, columns=data.target_names)

# Q1: Decision Tree Classifier
decision_tree_explanation = """
**Decision Tree Classifier**:
- **Algorithm**: Decision trees classify data by splitting it into subsets based on feature values. Each split is based on maximizing information gain (or minimizing impurity).
- **How It Works**:
  1. **Start** at the root node (entire dataset).
  2. **Split** the data into subsets based on the feature that results in the highest information gain (or lowest impurity).
  3. **Repeat** the process for each subset, creating branches and nodes until a stopping criterion is met (e.g., max depth, minimum samples per leaf).
  4. **Predict** class labels by traversing the tree from the root to a leaf node, based on feature values of the input data.
"""

# Q2: Mathematical Intuition
math_intuition_explanation = """
**Mathematical Intuition**:
1. **Entropy**: Measures the impurity or randomness in the data. For a node with classes, entropy is calculated as:
   - Entropy = - Σ (p_i * log2(p_i))
   - Where p_i is the probability of class i in the node.

2. **Information Gain**: Measures the reduction in entropy after splitting the data on a feature. It is calculated as:
   - Information Gain = Entropy(before split) - Σ (weighted entropy(after split))

3. **Gini Impurity**: An alternative to entropy, calculated as:
   - Gini Impurity = 1 - Σ (p_i^2)
   - Where p_i is the proportion of class i in the node.

4. **Tree Construction**: The algorithm selects the feature that maximizes information gain (or minimizes Gini impurity) for each split, creating branches until the stopping criteria are met.
"""

# Q3: Binary Classification
binary_classification_explanation = """
**Binary Classification with Decision Trees**:
- **Process**:
  1. **Train** the decision tree on a binary target variable (e.g., 'yes' or 'no').
  2. **Split** the data based on the feature that best separates the two classes (e.g., highest information gain).
  3. **Continue** splitting until the nodes are pure or meet other stopping criteria.
  4. **Predict** the class for new samples by following the splits until reaching a leaf node that provides the class label.
"""

# Q4: Geometric Intuition
geometric_intuition_explanation = """
**Geometric Intuition**:
- **Concept**: A decision tree partitions the feature space into rectangular regions. Each region corresponds to a class label.
- **Visualization**: The tree creates decision boundaries that are parallel to the axes (features). For binary classification, the boundaries can be visualized as lines dividing the feature space into regions where each region is classified into one of the two classes.
- **Prediction**: New samples are classified based on the region they fall into.
"""

# Q5: Confusion Matrix
confusion_matrix_explanation = """
**Confusion Matrix**:
- **Definition**: A table used to evaluate the performance of a classification model by showing the number of correct and incorrect predictions for each class.
- **Components**:
  - **True Positives (TP)**: Correctly predicted positive cases.
  - **True Negatives (TN)**: Correctly predicted negative cases.
  - **False Positives (FP)**: Incorrectly predicted positive cases.
  - **False Negatives (FN)**: Incorrectly predicted negative cases.
- **Usage**: Helps in calculating performance metrics like accuracy, precision, recall, and F1 score.
"""

# Q6: Example of Confusion Matrix and Metrics
confusion_matrix_example = """
**Confusion Matrix Example**:
- **True Positives (TP)**: 30 (correctly predicted positive cases)
- **True Negatives (TN)**: 30 (correctly predicted negative cases)
- **False Positives (FP)**: 2 (incorrectly predicted positive cases)
- **False Negatives (FN)**: 8 (incorrectly predicted negative cases)

**Metrics Calculation**:
- **Precision** = TP / (TP + FP) = 30 / (30 + 2) = 0.94
- **Recall** = TP / (TP + FN) = 30 / (30 + 8) = 0.79
- **F1 Score** = 2 * (Precision * Recall) / (Precision + Recall) = 2 * (0.94 * 0.79) / (0.94 + 0.79) = 0.86
"""

# Q7: Choosing Evaluation Metrics
evaluation_metric_selection = """
**Choosing Evaluation Metrics**:
- **Considerations**:
  - **Class Imbalance**: Metrics like precision, recall, and F1 score are better for imbalanced datasets.
  - **Business Goals**: Choose metrics that align with the goals (e.g., precision for fraud detection).
  - **Application Context**: Evaluate which metrics best reflect the model's performance in the given context.
"""

# Q8: Example of Precision Importance
precision_importance_example = """
**Example of Precision Importance**:
- **Scenario**: Email spam detection.
- **Reason**: False positives (non-spam emails classified as spam) can lead to missed important emails. High precision ensures that most emails classified as spam are indeed spam.
"""

# Q9: Example of Recall Importance
recall_importance_example = """
**Example of Recall Importance**:
- **Scenario**: Medical diagnosis for a rare disease.
- **Reason**: Missing a positive case (false negatives) can have serious health implications. High recall ensures most patients with the disease are correctly identified.
"""

# Display results
print("Q1: Decision Tree Classifier")
print(decision_tree_explanation)

print("\nQ2: Mathematical Intuition")
print(math_intuition_explanation)

print("\nQ3: Binary Classification with Decision Trees")
print(binary_classification_explanation)

print("\nQ4: Geometric Intuition")
print(geometric_intuition_explanation)

print("\nQ5: Confusion Matrix")
print(confusion_matrix_explanation)

print("\nQ6: Example of Confusion Matrix and Metrics")
print(confusion_matrix_example)

print("\nQ7: Choosing Evaluation Metrics")
print(evaluation_metric_selection)

print("\nQ8: Example of Precision Importance")
print(precision_importance_example)

print("\nQ9: Example of Recall Importance")
print(recall_importance_example)

# Plot the decision tree
plt.figure(figsize=(15, 10))
plot_tree(model, filled=True, feature_names=data.feature_names, class_names=data.target_names)
plt.title('Decision Tree Visualization')
plt.show()
