# DTSA 5022: Trees, SVM, and Unsupervised Learning

## Course Overview and Quick Reference Guide

This notebook serves as a comprehensive overview and quick reference guide for the key concepts, techniques, and implementations covered in this course.

### Course Objectives
- Understanding decision trees and ensemble methods
- Implementing Support Vector Machines
- Working with unsupervised learning algorithms
- Applying clustering and dimensionality reduction

In [None]:
# Import common libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Display settings
%matplotlib inline
plt.style.use('seaborn')
pd.set_option('display.max_columns', None)

## Week 1: Decision Trees and Random Forests

### Key Concepts
- 

### Important Terms
- 

### Code Examples

In [None]:
def train_and_visualize_tree(X, y, max_depth=3):
    """Train and visualize a decision tree"""
    # Train decision tree
    tree = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
    tree.fit(X, y)
    
    # Visualize tree
    plt.figure(figsize=(20,10))
    plot_tree(tree, feature_names=X.columns, class_names=np.unique(y).astype(str),
              filled=True, rounded=True)
    plt.title(f'Decision Tree (max_depth={max_depth})')
    plt.show()
    
    # Feature importance
    importance = pd.DataFrame({
        'feature': X.columns,
        'importance': tree.feature_importances_
    }).sort_values('importance', ascending=False)
    
    plt.figure(figsize=(10, 6))
    sns.barplot(data=importance, x='importance', y='feature')
    plt.title('Feature Importance')
    plt.show()
    
    return tree

## Week 2: Support Vector Machines

### Key Concepts
- 

### Important Components
- 

### Code Examples

In [None]:
def train_and_visualize_svm(X, y, kernel='rbf'):
    """Train and visualize SVM with different kernels"""
    # Train SVM
    svm = SVC(kernel=kernel, random_state=42)
    svm.fit(X, y)
    
    # Create mesh grid for visualization
    x_min, x_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
    y_min, y_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    # Plot decision boundary
    Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, alpha=0.8)
    plt.title(f'SVM Decision Boundary (kernel={kernel})')
    plt.show()
    
    return svm

## Week 3: Clustering Algorithms

### Key Concepts
- 

### Important Methods
- 

### Code Examples

In [None]:
def perform_clustering(X, n_clusters=3):
    """Perform and visualize K-means clustering"""
    # Perform K-means
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(X)
    
    # Visualize clusters
    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=clusters, cmap='viridis')
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
                marker='x', s=200, linewidths=3, color='r', label='Centroids')
    plt.title(f'K-means Clustering (k={n_clusters})')
    plt.colorbar(scatter)
    plt.legend()
    plt.show()
    
    # Elbow method
    inertias = []
    k_range = range(1, 11)
    for k in k_range:
        kmeans_temp = KMeans(n_clusters=k, random_state=42)
        kmeans_temp.fit(X)
        inertias.append(kmeans_temp.inertia_)
    
    plt.figure(figsize=(8, 6))
    plt.plot(k_range, inertias, 'bx-')
    plt.title('Elbow Method')
    plt.xlabel('k')
    plt.ylabel('Inertia')
    plt.show()
    
    return kmeans

## Week 4: Dimensionality Reduction

### Key Concepts
- 

### Important Techniques
- 

### Code Examples

In [None]:
def perform_pca(X, n_components=2):
    """Perform and visualize PCA"""
    # Perform PCA
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    
    # Explained variance ratio
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(pca.explained_variance_ratio_) + 1),
             np.cumsum(pca.explained_variance_ratio_), 'bo-')
    plt.title('Cumulative Explained Variance Ratio')
    plt.xlabel('Number of Components')
    plt.ylabel('Cumulative Explained Variance')
    plt.show()
    
    # Component weights
    components = pd.DataFrame(
        pca.components_,
        columns=X.columns,
        index=[f'PC{i+1}' for i in range(n_components)]
    )
    
    plt.figure(figsize=(12, 8))
    sns.heatmap(components, annot=True, cmap='coolwarm', center=0)
    plt.title('PCA Components')
    plt.show()
    
    return pca, X_pca