# Flower Species Classification - Example Notebook

This notebook demonstrates how to use the flower classification project interactively.

## 1. Setup and Imports

In [None]:
import sys
import os

# Add parent directory to path
sys.path.append(os.path.dirname(os.path.abspath('')))

import config
from src.data_utils import load_data, explore_data, preprocess_data, split_data, create_sample_data
from src.model_utils import create_classifier, train_model, evaluate_model, cross_validate_model
from src.visualization import plot_feature_distributions, plot_correlation_matrix, plot_pairplot

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

## 2. Create and Load Data

In [None]:
# Create sample data if it doesn't exist
if not os.path.exists(config.RAW_DATA_PATH):
    create_sample_data(config.RAW_DATA_PATH)

# Load data
df = load_data(config.RAW_DATA_PATH)
df.head()

## 3. Explore the Data

In [None]:
explore_data(df)

## 4. Visualize Data

In [None]:
# Correlation matrix
plot_correlation_matrix(df)

In [None]:
# Feature distributions
plot_feature_distributions(df)

In [None]:
# Pairplot
plot_pairplot(df)

## 5. Preprocess and Split Data

In [None]:
# Preprocess
X, y, scaler = preprocess_data(df, scale_features=True)

# Split
X_train, X_test, y_train, y_test = split_data(X, y)

## 6. Train a Model

In [None]:
# Create and train Random Forest classifier
model = create_classifier('random_forest')
model = train_model(model, X_train, y_train)

## 7. Evaluate Model

In [None]:
# Evaluate on test set
metrics = evaluate_model(model, X_test, y_test, detailed=True)

In [None]:
# Cross-validation
cv_results = cross_validate_model(model, X_train, y_train)

## 8. Make Predictions

In [None]:
# Predict on test set
predictions = model.predict(X_test)

# Create results DataFrame
results = pd.DataFrame({
    'True Species': y_test.values,
    'Predicted Species': predictions
})

results.head(10)

## 9. Feature Importance

In [None]:
from src.visualization import plot_feature_importance

plot_feature_importance(model, config.FEATURE_COLUMNS)

## 10. Try Different Models

In [None]:
models = {
    'Random Forest': 'random_forest',
    'Logistic Regression': 'logistic_regression',
    'SVM': 'svm',
    'KNN': 'knn'
}

comparison_results = {}

for name, model_type in models.items():
    print(f"\nTraining {name}...")
    model = create_classifier(model_type)
    model = train_model(model, X_train, y_train)
    metrics = evaluate_model(model, X_test, y_test, detailed=False)
    comparison_results[name] = metrics

# Display comparison
comparison_df = pd.DataFrame(comparison_results).T
comparison_df

In [None]:
# Visualize comparison
from src.visualization import plot_model_comparison

plot_model_comparison(comparison_results)