# Blood Discriminator - Exploratory Analysis

This notebook demonstrates the usage of the Blood Discriminator system.

In [None]:
import sys
sys.path.insert(0, '../src')

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from preprocessing.data_loader import BloodDataPreprocessor, split_data
from models.classifier import CNNBloodClassifier, TraditionalMLClassifier
from evaluation.metrics import ModelEvaluator

%matplotlib inline

## 1. Data Loading and Preprocessing

In [None]:
# Initialize preprocessor
preprocessor = BloodDataPreprocessor(img_size=(224, 224), normalize=True)

# For demonstration, create synthetic data
n_samples = 1000
X = np.random.rand(n_samples, 224, 224, 3).astype(np.float32)
y = np.random.randint(0, 4, n_samples)

# Split data
X_train, X_val, X_test, y_train, y_val, y_test = split_data(
    X, y, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15, random_state=42
)

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")

## 2. Class Distribution Analysis

In [None]:
class_names = ['A', 'B', 'AB', 'O']

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for ax, data, title in zip(axes, [y_train, y_val, y_test], ['Train', 'Validation', 'Test']):
    unique, counts = np.unique(data, return_counts=True)
    ax.bar([class_names[i] for i in unique], counts)
    ax.set_title(f'{title} Set Distribution')
    ax.set_xlabel('Blood Type')
    ax.set_ylabel('Count')

plt.tight_layout()
plt.show()

## 3. Train CNN Model

In [None]:
# Initialize CNN model
cnn_model = CNNBloodClassifier(
    input_shape=(224, 224, 3),
    num_classes=4,
    learning_rate=0.001
)

# Train model
history = cnn_model.train(
    X_train, y_train,
    X_val, y_val,
    epochs=10,  # Reduced for demo
    batch_size=32
)

## 4. Training History Visualization

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 4))

# Plot accuracy
axes[0].plot(history.history['accuracy'], label='Train')
axes[0].plot(history.history['val_accuracy'], label='Validation')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# Plot loss
axes[1].plot(history.history['loss'], label='Train')
axes[1].plot(history.history['val_loss'], label='Validation')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 5. Model Evaluation

In [None]:
# Make predictions
y_pred = cnn_model.predict(X_test)
y_pred_proba = cnn_model.model.predict(X_test)

# Evaluate
evaluator = ModelEvaluator(class_names=class_names)
results = evaluator.comprehensive_evaluation(
    y_test, y_pred, y_pred_proba
)

# Print report
report = evaluator.generate_report()
print(report)

## 6. Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
           xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix - CNN Model')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

## 7. Compare with Traditional ML Models

In [None]:
# Train Random Forest
rf_model = TraditionalMLClassifier(model_type='random_forest', n_estimators=50)
X_train_flat = np.concatenate([X_train, X_val])
y_train_flat = np.concatenate([y_train, y_val])
rf_model.train(X_train_flat, y_train_flat)

# Predict
y_pred_rf = rf_model.predict(X_test)

# Evaluate
evaluator_rf = ModelEvaluator(class_names=class_names)
results_rf = evaluator_rf.comprehensive_evaluation(y_test, y_pred_rf)

# Compare accuracies
print(f"CNN Accuracy: {results['quality']['accuracy']:.4f}")
print(f"Random Forest Accuracy: {results_rf['quality']['accuracy']:.4f}")

## 8. Per-Class Performance Analysis

In [None]:
# Extract per-class metrics
metrics_to_plot = ['precision', 'recall', 'f1']
cnn_scores = {}

for metric in metrics_to_plot:
    cnn_scores[metric] = [results['quality'].get(f'{metric}_{cls}', 0) for cls in class_names]

# Plot
x = np.arange(len(class_names))
width = 0.25

fig, ax = plt.subplots(figsize=(12, 6))

for i, metric in enumerate(metrics_to_plot):
    ax.bar(x + i*width, cnn_scores[metric], width, label=metric.capitalize())

ax.set_xlabel('Blood Type')
ax.set_ylabel('Score')
ax.set_title('Per-Class Performance Metrics - CNN Model')
ax.set_xticks(x + width)
ax.set_xticklabels(class_names)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()