In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# Load the Iris dataset into a pandas DataFrame
iris = load_iris(as_frame=True)
df = iris.frame

# (Optional) Simulate missing data for demonstration
# df.iloc[0, 0] = None  # Uncomment to test handling missing values

# Step 1: Handle missing values (if any)
imputer = SimpleImputer(strategy='mean')  # Replace missing values with column mean
df[iris.feature_names] = imputer.fit_transform(df[iris.feature_names])

# Step 2: Encode labels (target values)
label_encoder = LabelEncoder()
df['target'] = label_encoder.fit_transform(df['target'])

# Step 3: Split features and target
X = df[iris.feature_names]
y = df['target']

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = clf.predict(X_test)

# Step 7: Evaluate the model
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='macro')
rec = recall_score(y_test, y_pred, average='macro')

# Step 8: Print evaluation results
print(f"Accuracy: {acc:.2f}")
print(f"Precision: {prec:.2f}")
print(f"Recall: {rec:.2f}")

Accuracy: 1.00
Precision: 1.00
Recall: 1.00


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
import warnings
warnings.filterwarnings('ignore')

# Enable inline plotting for Jupyter
%matplotlib inline

# Set random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

print("TensorFlow version:", tf.__version__)
print("Setup complete! ✅")

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
print("Loading MNIST dataset...")
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

print(f"Training data shape: {x_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test data shape: {x_test.shape}")
print(f"Test labels shape: {y_test.shape}")
print(f"Pixel value range: {x_train.min()} to {x_train.max()}")
print(f"Number of classes: {len(np.unique(y_train))}")

In [None]:
def visualize_samples(x_data, y_data, num_samples=5, title="Sample Images"):
    """Visualize sample images from the dataset"""
    plt.figure(figsize=(12, 3))
    plt.suptitle(title, fontsize=16)
    for i in range(num_samples):
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(x_data[i], cmap='gray')
        plt.title(f'Label: {y_data[i]}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Show some training examples
visualize_samples(x_train, y_train, 5, "Training Examples")

In [None]:
print("Preprocessing data...")

# Normalize pixel values to 0-1 range
x_train_norm = x_train.astype('float32') / 255.0
x_test_norm = x_test.astype('float32') / 255.0

# Reshape for CNN (add channel dimension)
x_train_cnn = x_train_norm.reshape(-1, 28, 28, 1)
x_test_cnn = x_test_norm.reshape(-1, 28, 28, 1)

# Convert labels to categorical
num_classes = 10
y_train_cat = keras.utils.to_categorical(y_train, num_classes)
y_test_cat = keras.utils.to_categorical(y_test, num_classes)

print(f"Original shape: {x_train.shape} → CNN shape: {x_train_cnn.shape}")
print(f"Label shape: {y_train.shape} → Categorical shape: {y_train_cat.shape}")
print("Preprocessing complete! ✅")

In [None]:
def create_mnist_cnn():
    """Create CNN model for MNIST digit classification"""
    model = keras.Sequential([
        # First Conv Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        
        # Second Conv Block  
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        
        # Third Conv Block
        layers.Conv2D(64, (3, 3), activation='relu'),
        
        # Classification Head
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    return model

# Create and compile model
model = create_mnist_cnn()
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Display architecture
print("Model Architecture:")
model.summary()


In [None]:
print("Starting training...")

# Training callbacks for better results
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=2,
        min_lr=0.0001,
        verbose=1
    )
]

# Train model
history = model.fit(
    x_train_cnn, y_train_cat,
    batch_size=128,
    epochs=15,
    validation_data=(x_test_cnn, y_test_cat),
    callbacks=callbacks,
    verbose=1
)

print("Training complete! ✅")

In [None]:
print("Evaluating model...")

# Make predictions
y_pred_proba = model.predict(x_test_cnn, verbose=0)
y_pred = np.argmax(y_pred_proba, axis=1)

# Calculate metrics
test_accuracy = accuracy_score(y_test, y_pred)
test_precision = precision_score(y_test, y_pred, average='macro')
test_recall = recall_score(y_test, y_pred, average='macro')

print(f"\n📊 MODEL PERFORMANCE:")
print(f"{'='*40}")
print(f"Test Accuracy:  {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall:    {test_recall:.4f}")

# Check if goal achieved
if test_accuracy > 0.95:
    print("🎉 SUCCESS: Achieved >95% test accuracy!")
else:
    print(f"⚠️  Current accuracy: {test_accuracy*100:.2f}%")
    print("💡 Try training longer or adjusting the model")

In [None]:
def plot_training_history(history):
    """Create training history plots"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Accuracy plot
    ax1.plot(history.history['accuracy'], 'b-', label='Training Accuracy', linewidth=2)
    ax1.plot(history.history['val_accuracy'], 'r-', label='Validation Accuracy', linewidth=2)
    ax1.set_title('Model Accuracy Over Time', fontsize=14)
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Loss plot
    ax2.plot(history.history['loss'], 'b-', label='Training Loss', linewidth=2)
    ax2.plot(history.history['val_loss'], 'r-', label='Validation Loss', linewidth=2)
    ax2.set_title('Model Loss Over Time', fontsize=14)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

# Plot training history
plot_training_history(history)

In [None]:
def visualize_predictions(model, x_data, y_true, num_samples=5):
    """Show model predictions on sample images"""
    # Get random samples
    indices = np.random.choice(len(x_data), num_samples, replace=False)
    
    # Make predictions
    predictions = model.predict(x_data[indices], verbose=0)
    predicted_classes = np.argmax(predictions, axis=1)
    
    plt.figure(figsize=(16, 4))
    plt.suptitle('Model Predictions on Test Images', fontsize=16)
    
    for i, idx in enumerate(indices):
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(x_data[idx].reshape(28, 28), cmap='gray')
        
        # Get prediction details
        confidence = np.max(predictions[i]) * 100
        true_label = y_true[idx]
        pred_label = predicted_classes[i]
        
        # Color code: green if correct, red if wrong
        color = 'green' if true_label == pred_label else 'red'
        
        plt.title(f'True: {true_label} | Pred: {pred_label}\nConfidence: {confidence:.1f}%', 
                 color=color, fontsize=10)
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # Show prediction statistics
    correct = sum(y_true[indices] == predicted_classes)
    print(f"Predictions shown: {correct}/{num_samples} correct")

# Show predictions on test images
print("🔍 Model predictions on random test images:")
visualize_predictions(model, x_test_cnn, y_test, 5)