In [None]:
!pip install kaggle

### Setup Kaggle Credentials:

1. Go to https://www.kaggle.com/
2. Sign in or create an account
3. Go to Account Settings (click your profile picture ‚Üí Account)
4. Scroll to "API" section
5. Click "Create New API Token"
6. This downloads `kaggle.json` file
7. Upload it to this notebook or your environment

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

!unzip -q chest-xray-pneumonia.zip -d ./dataset
print("‚úÖ Dataset downloaded and extracted!")

### Method 2: Manual Download

If Kaggle API doesn't work:

1. Visit: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
2. Click "Download" button
3. Extract the zip file
4. Upload to your notebook environment

Dataset structure:
```
chest_xray/
‚îú‚îÄ‚îÄ train/
‚îÇ   ‚îú‚îÄ‚îÄ NORMAL/
‚îÇ   ‚îî‚îÄ‚îÄ PNEUMONIA/
‚îú‚îÄ‚îÄ test/
‚îÇ   ‚îú‚îÄ‚îÄ NORMAL/
‚îÇ   ‚îî‚îÄ‚îÄ PNEUMONIA/
‚îî‚îÄ‚îÄ val/
    ‚îú‚îÄ‚îÄ NORMAL/
    ‚îî‚îÄ‚îÄ PNEUMONIA/
```

In [None]:
!pip install numpy pandas opencv-python scikit-image PyWavelets Pillow matplotlib seaborn scikit-learn xgboost ipywidgets
print("‚úÖ All packages installed!")

---
## üì¶ Step 2: Install Required Packages

In [None]:
import numpy as np
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')

# Image processing
import cv2
from PIL import Image
from skimage.feature import graycomatrix, graycoprops
import pywt

# Visualization
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
sns.set_style("whitegrid")

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from xgboost import XGBClassifier

# Interactive widgets
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display, clear_output, HTML

import time
import pickle

print("‚úÖ All libraries imported successfully!")

## üîß Step 4: Define Feature Extraction Functions

In [None]:
def get_rotation_invariant_glcm_features(image_array, distance=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=32):

    image_array = image_array.astype(np.uint8)
    max_val = image_array.max()
    if max_val >= levels:
        quantized_array = np.floor(image_array * ((levels - 1) / max_val)).astype(np.uint8)
    else:
        quantized_array = image_array

    gcom = graycomatrix(
        quantized_array,
        distances=distance,
        angles=angles,
        levels=levels,
        symmetric=True,
        normed=True
    )

    dissimilarity = np.mean(graycoprops(gcom, 'dissimilarity'))
    correlation = np.mean(graycoprops(gcom, 'correlation'))
    homogeneity = np.mean(graycoprops(gcom, 'homogeneity'))
    energy = np.mean(graycoprops(gcom, 'energy'))

    return [dissimilarity, correlation, homogeneity, energy]

In [None]:
def get_wavelet_features(image_array, wavelet='haar', level=1):
    coeffs = pywt.wavedec2(image_array.astype(float), wavelet, level=level)
    LH, HL, HH = coeffs[1]

    lh_energy = np.sum(LH**2)
    hl_energy = np.sum(HL**2)
    hh_energy = np.sum(HH**2)

    return [lh_energy, hl_energy, hh_energy]

In [None]:
def get_multi_feature_embedding(image_path, resize_to=(128, 128), n_blocks=4, bit_depth=16):
    if resize_to[0] % n_blocks != 0 or resize_to[1] % n_blocks != 0:
        raise ValueError("resize_to dimensions must be divisible by n_blocks.")

    block_w = resize_to[0] // n_blocks
    block_h = resize_to[1] // n_blocks

    try:
        img = Image.open(image_path).convert('L')
        img = img.resize(resize_to)
        img_array = np.array(img).astype(np.uint8)

        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        clahe_img = clahe.apply(img_array)

        embedding = []
        MAX_DISSIMILARITY = 16.0
        MAX_INTENSITY = 255.0
        MAX_WAVELET_ENERGY = 500000.0

        for i in range(n_blocks):
            for j in range(n_blocks):
                block = clahe_img[i*block_h:(i+1)*block_h, j*block_w:(j+1)*block_w]

                glcm_features = get_rotation_invariant_glcm_features(block)
                wavelet_features = get_wavelet_features(block)
                mean_intensity = np.mean(block)
                std_intensity = np.std(block)

                features = [
                    min(glcm_features[0] / MAX_DISSIMILARITY, 1.0),
                    (glcm_features[1] + 1.0) / 2.0,
                    glcm_features[2],
                    glcm_features[3],
                    min(wavelet_features[0] / MAX_WAVELET_ENERGY, 1.0),
                    min(wavelet_features[1] / MAX_WAVELET_ENERGY, 1.0),
                    min(wavelet_features[2] / MAX_WAVELET_ENERGY, 1.0),
                    mean_intensity / MAX_INTENSITY,
                    std_intensity / MAX_INTENSITY
                ]

                for feat in features:
                    scaled_value = int(feat * (2**bit_depth - 1))
                    bin_str = bin(scaled_value)[2:].zfill(bit_depth)
                    bin_list = [int(bit) for bit in bin_str]
                    embedding.extend(bin_list)

        return embedding
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

In [None]:
def embedding_to_analog(embedding, bit_depth=16):
    if not embedding:
        return []
    analog = []
    for i in range(0, len(embedding), bit_depth):
        bin_str = ''.join(str(bit) for bit in embedding[i:i+bit_depth])
        value = int(bin_str, 2)
        analog.append(value)
    return analog

In [None]:
def generate_em_wave(analog_values, feature_weights=None, carrier_freq=5,
                     sampling_rate=50, duration_per_value=0.1, max_value=2**16 - 1):
    if not analog_values:
        return np.array([0]), np.array([0])

    total_duration = len(analog_values) * duration_per_value
    t = np.linspace(0, total_duration, int(total_duration * sampling_rate), endpoint=False)
    wave = np.zeros_like(t)
    samples_per_value = int(duration_per_value * sampling_rate)

    if feature_weights is None:
        feature_weights = np.ones(len(analog_values))

    for i, value in enumerate(analog_values):
        amp = value / max_value
        weighted_amp = amp * feature_weights[i]

        start = i * samples_per_value
        end = min(start + samples_per_value, len(t))

        if start < len(t):
            wave[start:end] = weighted_amp * np.sin(2 * np.pi * carrier_freq * t[start:end])

    return t, wave

print("‚úÖ Feature extraction functions defined!")

---
## üìÇ Step 5: Load Dataset

In [None]:
root_path = './dataset/chest_xray/train'
# Alternative paths you might need:
# root_path = './chest_xray/train'  # If extracted differently
# root_path = '/kaggle/input/chest-xray-pneumonia/chest_xray/train'  # For Kaggle notebooks

image_paths = []
labels = []

print("Loading dataset...")
for label in os.listdir(root_path):
    label_path = os.path.join(root_path, label)
    if os.path.isdir(label_path):
        for img_file in os.listdir(label_path):
            if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_paths.append(os.path.join(label_path, img_file))
                labels.append(label)

df = pd.DataFrame({'image_path': image_paths, 'label': labels})

print(f"‚úÖ Loaded {len(df)} images")
print(f"\nDataset shape: {df.shape}")
print(f"\nClasses: {df['label'].unique()}")
print(f"\nClass distribution:\n{df['label'].value_counts()}")

## üìä Step 6: Explore Data

In [None]:
print("First 5 samples:")
display(df.head())

print("\nDataset Info:")
print(df.info())

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
sns.countplot(data=df, x="label", palette="viridis", ax=ax1)
ax1.set_title("Distribution of Classes", fontsize=14, fontweight='bold')
ax1.set_xlabel("Class", fontsize=12)
ax1.set_ylabel("Count", fontsize=12)

for p in ax1.patches:
    ax1.annotate(f'{int(p.get_height())}',
                (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='bottom', fontsize=11, color='black',
                xytext=(0, 5), textcoords='offset points')

label_counts = df["label"].value_counts()
colors = sns.color_palette("viridis", len(label_counts))
ax2.pie(label_counts, labels=label_counts.index, autopct='%1.1f%%',
       startangle=140, colors=colors, textprops={'fontsize': 10, 'weight': 'bold'},
       wedgeprops={'edgecolor': 'black', 'linewidth': 1})
ax2.set_title("Class Distribution - Pie Chart", fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

---
## üñºÔ∏è Step 7: Display Sample Images

In [None]:
num_images = 5
unique_labels = df['label'].unique()

plt.figure(figsize=(15, len(unique_labels) * 3))

for row_idx, label in enumerate(unique_labels):
    label_images = df[df['label'] == label].head(num_images)['image_path'].tolist()
    for col_idx, img_path in enumerate(label_images):
        plt_idx = row_idx * num_images + col_idx + 1
        plt.subplot(len(unique_labels), num_images, plt_idx)
        img = Image.open(img_path)
        plt.imshow(img, cmap='gray')
        plt.axis('off')
        if col_idx == 2:
            plt.title(label, fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

---
## ‚öñÔ∏è Step 8: Balance Dataset (Optional but Recommended)

In [None]:
print("Original distribution:")
print(df['label'].value_counts())

max_samples = df['label'].value_counts().max()
balanced_df = df.groupby('label', group_keys=False).apply(
    lambda x: x.sample(n=max_samples, replace=True, random_state=42)
).reset_index(drop=True)

df = balanced_df[['image_path', 'label']]

print("\n‚úÖ Balanced distribution:")
print(df['label'].value_counts())

---
## üéØ Step 9: Extract Features from Images

**‚ö†Ô∏è Warning:** This step is computationally intensive and may take several minutes depending on your dataset size.
- Expect ~1-2 seconds per image
- For 5,000 images, this could take 1-2 hours

In [None]:
USE_SUBSET = False
SUBSET_SIZE = 100 

if USE_SUBSET:
    df_sample = df.sample(n=SUBSET_SIZE, random_state=42).reset_index(drop=True)
    print(f"‚ö†Ô∏è Using subset of {SUBSET_SIZE} images for testing")
else:
    df_sample = df.copy()
    print(f"Processing all {len(df_sample)} images")

print("\nExtracting features... This may take a while ‚òï")
print("Progress:")
embeddings = []
total = len(df_sample)

start_time = time.time()

for idx, path in enumerate(df_sample['image_path']):
    embedding = get_multi_feature_embedding(path)
    embeddings.append(embedding)
    if (idx + 1) % max(1, total // 10) == 0:
        elapsed = time.time() - start_time
        progress = (idx + 1) / total * 100
        eta = (elapsed / (idx + 1)) * (total - idx - 1)
        print(f"  {progress:.1f}% ({idx+1}/{total}) - Elapsed: {elapsed:.1f}s - ETA: {eta:.1f}s")

df_sample['multi_feature_embedding'] = embeddings
df_sample['multi_analog_values'] = df_sample['multi_feature_embedding'].apply(embedding_to_analog)

total_time = time.time() - start_time
print(f"\n‚úÖ Feature extraction complete! Total time: {total_time:.1f}s ({total_time/len(df_sample):.2f}s per image)")

---
## üìà Step 10: Visualize EM Waves

In [None]:
N_PLOTS = min(5, len(df_sample))
fig, axes = plt.subplots(N_PLOTS, 1, figsize=(14, 2 * N_PLOTS))

if N_PLOTS == 1:
    axes = [axes]

plt.suptitle('Simulated EM Wave from Multi-Feature Embedding', fontsize=14, y=1.0)
FEATURES_TO_PLOT = 50

for i in range(N_PLOTS):
    analog_values = df_sample['multi_analog_values'].iloc[i]
    label = df_sample['label'].iloc[i]
    analog_subsample = analog_values[:FEATURES_TO_PLOT]
    t, wave = generate_em_wave(analog_subsample, carrier_freq=10, sampling_rate=100)
    
    ax = axes[i]
    ax.plot(t, wave, color='purple', linewidth=1.5)
    ax.set_title(f"Image {i+1} - Label: {label}", fontsize=10, loc='left')
    ax.set_ylabel('Amplitude')
    ax.set_ylim(-1.1, 1.1)
    ax.grid(axis='y', linestyle='--', alpha=0.7)
    
    if i < N_PLOTS - 1:
        ax.set_xticks([])

axes[-1].set_xlabel(f'Time (s) - Showing first {FEATURES_TO_PLOT} features')
plt.tight_layout()
plt.show()

## üó∫Ô∏è Step 11: Visualize Feature Maps

In [None]:
FEATURE_NAMES = [
    'GLCM: Dissimilarity',
    'GLCM: Correlation',
    'GLCM: Homogeneity',
    'GLCM: Energy',
    'Wavelet: LH Energy',
    'Wavelet: HL Energy',
    'Wavelet: HH Energy',
    'Intensity: Mean',
    'Intensity: Std Dev'
]
N_FEATURES_PER_BLOCK = 9
N_BLOCKS = 4

analog_values = df_sample['multi_analog_values'].iloc[0]
image_label = df_sample['label'].iloc[0]
analog_matrix = np.array(analog_values).reshape(N_BLOCKS * N_BLOCKS, N_FEATURES_PER_BLOCK)

fig, axes = plt.subplots(3, 3, figsize=(12, 10))
fig.suptitle(f'Feature Maps (Label: {image_label})', fontsize=16, y=0.98)

for f_idx, feature_name in enumerate(FEATURE_NAMES):
    feature_magnitudes = analog_matrix[:, f_idx]
    feature_map = feature_magnitudes.reshape(N_BLOCKS, N_BLOCKS)
    
    ax = axes[f_idx // 3, f_idx % 3]
    im = ax.imshow(feature_map, cmap='magma', interpolation='nearest')
    ax.set_title(feature_name, fontsize=10)
    ax.set_xticks([])
    ax.set_yticks([])
    cbar = fig.colorbar(im, ax=ax, fraction=0.045, pad=0.04)
    cbar.ax.tick_params(labelsize=8)

plt.tight_layout()
plt.show()

## ü§ñ Step 12: Prepare Data for Machine Learning

In [None]:
X = np.array(df_sample['multi_feature_embedding'].tolist())
le = LabelEncoder()
y = le.fit_transform(df_sample['label'])
target_names = le.classes_

print(f"Feature matrix shape: {X.shape}")
print(f"Labels shape: {y.shape}")
print(f"Classes: {target_names}")
print(f"\nClass encoding:")
for idx, name in enumerate(target_names):
    print(f"  {name}: {idx}")
    

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"\nTraining set distribution:")
unique, counts = np.unique(y_train, return_counts=True)
for u, c in zip(unique, counts):
    print(f"  {target_names[u]}: {c}")
print(f"\nTest set distribution:")
unique, counts = np.unique(y_test, return_counts=True)
for u, c in zip(unique, counts):
    print(f"  {target_names[u]}: {c}")

## üéì Step 13: Train Machine Learning Models

In [None]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, solver='liblinear', random_state=42),
    "Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1),
    "XGBoost": XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss', random_state=42, n_jobs=-1)
}

results = {}
print("Training models...\n")
print("="*80)

In [None]:
for name, model in models.items():
    print(f"\nTraining {name}...")
    start_time = time.time()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    end_time = time.time()
    train_time = end_time - start_time
    results[name] = {
        'model': model,
        'predictions': y_pred,
        'report': classification_report(y_test, y_pred, target_names=target_names, output_dict=True),
        'time': train_time
    }
    accuracy = results[name]['report']['accuracy']
    print(f"  ‚úì Trained in {train_time:.2f}s")
    print(f"  Accuracy: {accuracy:.4f}")
    print("-"*80)
print("\n‚úÖ All models trained!")

## üìä Step 14: Model Comparison - Confusion Matrices

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
axes = axes.flatten()
plt.suptitle('Model Comparison: Confusion Matrices', fontsize=16, y=0.995)

for i, (name, res) in enumerate(results.items()):
    if i >= len(axes):
        break
    
    y_pred = res['predictions']
    cm = confusion_matrix(y_test, y_pred)
    acc = res['report']['accuracy']
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=target_names, yticklabels=target_names,
                ax=axes[i], cbar_kws={'shrink': 0.8})
    axes[i].set_title(f"{name}\nAccuracy: {acc:.4f}", fontsize=12, fontweight='bold')
    axes[i].set_xlabel('Predicted Label', fontsize=10)
    axes[i].set_ylabel('True Label', fontsize=10)
for i in range(len(results), len(axes)):
    axes[i].set_visible(False)

plt.tight_layout()
plt.show()

## üìã Step 15: Detailed Performance Reports

In [None]:
print("="*80)
print("DETAILED CLASSIFICATION REPORTS")
print("="*80)

for name, res in results.items():
    print(f"\n{'='*80}")
    print(f"{name}")
    print(f"{'='*80}")
    print(f"Training Time: {res['time']:.2f}s\n")
    print(classification_report(y_test, res['predictions'], target_names=target_names))
    print(f"{'='*80}")
    

In [None]:
summary_data = []

for name, res in results.items():
    report = res['report']
    summary_data.append({
        'Model': name,
        'Accuracy': f"{report['accuracy']:.4f}",
        'Precision (NORMAL)': f"{report['NORMAL']['precision']:.4f}",
        'Recall (NORMAL)': f"{report['NORMAL']['recall']:.4f}",
        'F1-Score (NORMAL)': f"{report['NORMAL']['f1-score']:.4f}",
        'Precision (PNEUMONIA)': f"{report['PNEUMONIA']['precision']:.4f}",
        'Recall (PNEUMONIA)': f"{report['PNEUMONIA']['recall']:.4f}",
        'F1-Score (PNEUMONIA)': f"{report['PNEUMONIA']['f1-score']:.4f}",
        'Training Time (s)': f"{res['time']:.2f}"
    })

summary_df = pd.DataFrame(summary_data)

print("\n" + "="*100)
print("SUMMARY PERFORMANCE TABLE")
print("="*100)
display(summary_df)


## üéØ Step 16: Predict Single Image (Interactive)

In [None]:
def predict_image(image_path, model_name='Random Forest'):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    img = Image.open(image_path)
    ax1.imshow(img, cmap='gray')
    ax1.set_title('Input Chest X-Ray', fontsize=14, fontweight='bold')
    ax1.axis('off')
    print("Extracting features...")
    embedding = get_multi_feature_embedding(image_path)
    
    if embedding is None:
        print("‚ùå Error: Could not process image")
        return
    
    X_input = np.array([embedding])
    if model_name not in results:
        print(f"‚ùå Model '{model_name}' not found")
        return
    
    model = results[model_name]['model']
    prediction = model.predict(X_input)[0]
    label = le.inverse_transform([prediction])[0]
    if hasattr(model, 'predict_proba'):
        proba = model.predict_proba(X_input)[0]
    else:
        proba = None
    result_text = f"PREDICTION: {label}\n\n"
    result_text += f"Model: {model_name}\n\n"
    
    if proba is not None:
        result_text += "Confidence Scores:\n"
        for idx, class_name in enumerate(target_names):
            result_text += f"  {class_name}: {proba[idx]*100:.2f}%\n"
    
    if label == "PNEUMONIA":
        color = 'red'
        result_text += "\n‚ö†Ô∏è WARNING: Pneumonia detected!\n"
        result_text += "Please consult a medical professional."
    else:
        color = 'green'
        result_text += "\n‚úì Normal chest X-ray"
    
    ax2.text(0.5, 0.5, result_text, ha='center', va='center', 
             fontsize=12, bbox=dict(boxstyle='round', facecolor=color, alpha=0.2),
             transform=ax2.transAxes)
    ax2.set_xlim(0, 1)
    ax2.set_ylim(0, 1)
    ax2.axis('off')
    ax2.set_title('Diagnosis Result', fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.show()
    
    return label, proba

print("‚úÖ Prediction function ready!")
    

test_image_idx = np.random.randint(0, len(df_sample))
test_image_path = df_sample['image_path'].iloc[test_image_idx]
true_label = df_sample['label'].iloc[test_image_idx]

print(f"Testing image {test_image_idx}")
print(f"True label: {true_label}\n")

predicted_label, confidence = predict_image(test_image_path, model_name='Random Forest')

## üíæ Step 17: Save Trained Models

In [None]:
save_data = {
    'models': {name: res['model'] for name, res in results.items()},
    'label_encoder': le,
    'target_names': target_names
}

with open('pneumonia_models.pkl', 'wb') as f:
    pickle.dump(save_data, f)

print("‚úÖ Models saved to 'pneumonia_models.pkl'")
print(f"   File size: {os.path.getsize('pneumonia_models.pkl') / (1024*1024):.2f} MB")

## üì• Step 18: Load Pre-trained Models (Optional)

In [None]:
def load_models(filepath='pneumonia_models.pkl'):
    with open(filepath, 'rb') as f:
        save_data = pickle.load(f)
    return save_data['models'], save_data['label_encoder'], save_data['target_names']

# Uncomment to load:
# loaded_models, le, target_names = load_models('pneumonia_models.pkl')
# print("‚úÖ Models loaded successfully!")
# print(f"   Available models: {list(loaded_models.keys())}")

## üéÆ Interactive Widget for Predictions

In [None]:
model_dropdown = widgets.Dropdown(
    options=list(results.keys()),
    value='Random Forest',
    description='Model:',
    style={'description_width': 'initial'}
)

image_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(df_sample)-1,
    step=1,
    description='Image Index:',
    style={'description_width': 'initial'}
)

predict_button = widgets.Button(
    description='üîç Predict',
    button_style='success',
    tooltip='Click to predict',
    icon='check'
)

output = widgets.Output()

def on_predict_clicked(b):
    with output:
        clear_output(wait=True)
        img_idx = image_slider.value
        img_path = df_sample['image_path'].iloc[img_idx]
        true_label = df_sample['label'].iloc[img_idx]
        model_name = model_dropdown.value
        
        print(f"Image Index: {img_idx}")
        print(f"True Label: {true_label}\n")
        
        predict_image(img_path, model_name)

predict_button.on_click(on_predict_clicked)

print("\n" + "="*80)
print("INTERACTIVE PREDICTION WIDGET")
print("="*80)
display(widgets.VBox([model_dropdown, image_slider, predict_button, output]))

## üìä Final Summary

In [None]:
print("\n" + "="*80)
print("üéâ PNEUMONIA DETECTION SYSTEM - COMPLETE SUMMARY")
print("="*80)

print(f"\nüìä Dataset Information:")
print(f"   Total images processed: {len(df_sample)}")
print(f"   Training samples: {len(X_train)}")
print(f"   Test samples: {len(X_test)}")
print(f"   Feature vector size: {X.shape[1]}")

print(f"\nü§ñ Models Trained: {len(results)}")
for name in results.keys():
    print(f"   ‚úì {name}")

print(f"\nüèÜ Best Model Performance:")
best_model = max(results.items(), key=lambda x: x[1]['report']['accuracy'])
best_name = best_model[0]
best_acc = best_model[1]['report']['accuracy']
print(f"   Model: {best_name}")
print(f"   Accuracy: {best_acc:.4f}")

print(f"\n‚ö†Ô∏è Important Reminders:")
print(f"   ‚Ä¢ This is for educational purposes only")
print(f"   ‚Ä¢ Not approved for clinical diagnosis")
print(f"   ‚Ä¢ Always consult medical professionals")

print("\n" + "="*80)
print("‚úÖ System ready for predictions!")
print("="*80)