<a href="https://colab.research.google.com/github/Sibahle01/Brain-Tumor-Classification/blob/main/Tumor_Name_Identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧠 Brain Tumor Classification using MRI Scans
This notebook demonstrates how to classify brain MRI scans into four categories:
- Glioma
- Meningioma
- Pituitary
- No Tumor

We’ll use **Transfer Learning (ResNet50)** and explain predictions with **Grad-CAM**.


# 🔹 Step 0: Setup & Download Dataset

#🔹 1. Setup & Imports

In [None]:
!pip install tensorflow keras matplotlib scikit-learn



In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

#🔹2. Dataset Loading

In [2]:
# Mount Google Drive if dataset is in Drive
from google.colab import drive
drive.mount('/content/drive')

# Example path - adjust based on your dataset
train_dir = "/content/drive/MyDrive/archive (2)/Training"
test_dir  = "/content/drive/MyDrive/archive (2)/Testing"

img_size = (224,224)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical'
)

test_gen = test_datagen.flow_from_directory(
    test_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical', shuffle=False
)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 5722 images belonging to 4 classes.
Found 1321 images belonging to 4 classes.


#🔹 3. Build Model (Transfer Learning: ResNet50)

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False  # freeze pretrained layers

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(train_gen.num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


#🔹 4. Training

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    train_gen,
    validation_data=test_gen,
    epochs=10,
    callbacks=[early_stop]
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m 96/179[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m12:02[0m 9s/step - accuracy: 0.3250 - loss: 1.6071

In [None]:
# Check if GPU is available
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
# Improved tumor detection model with multiple enhancements
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import gc

# Enable mixed precision for faster training
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Check GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Mixed precision enabled:", policy.name)

# Clear memory
tf.keras.backend.clear_session()
gc.collect()

# Data augmentation with medical imaging specific enhancements
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='constant',
    cval=0,  # Fill with black for medical images
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Assuming you have these directories set up
train_dir = '/content/drive/MyDrive/archive (2)/Training'  # Update with your path
test_dir = '/content/drive/MyDrive/archive (2)/Testing'    # Update with your path

# Create data generators
train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,  # Increased batch size for better performance
    class_mode='categorical',
    shuffle=True,
    subset='training'
)

val_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False,
    subset='validation'
)

test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Calculate class weights for imbalanced data
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_gen.classes),
    y=train_gen.classes
)
class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)

# Create optimized EfficientNet model
def create_optimized_model(num_classes):
    base_model = EfficientNetB0(
        weights='imagenet',
        include_top=False,
        input_shape=(224, 224, 3),
        pooling='avg'  # Changed from None to 'avg' for better performance
    )

    # Freeze the base model initially
    base_model.trainable = False

    model = Sequential([
        base_model,
        BatchNormalization(),
        Dropout(0.6),
        Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.4),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(num_classes, activation='softmax', dtype='float32')  # Ensure output is float32
    ])

    return model

# Create model
model = create_optimized_model(train_gen.num_classes)
model.summary()

# Learning rate scheduler
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=1000,
    decay_rate=0.9,
    staircase=True
)

# Compile model
model.compile(
    optimizer=Adam(learning_rate=lr_schedule),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

# Enhanced callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=12,
        restore_best_weights=True,
        mode='max',
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        mode='max',
        verbose=1
    ),
    ModelCheckpoint(
        'best_tumor_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )
]

# Convert generators to tf.data.Dataset for better performance
def create_tf_dataset(generator):
    return tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, 224, 224, 3], [None, train_gen.num_classes])
    ).prefetch(tf.data.AUTOTUNE)

train_dataset = create_tf_dataset(train_gen)
val_dataset = create_tf_dataset(val_gen)

# Stage 1: Train the top layers
print("Stage 1: Training top layers...")
history1 = model.fit(
    train_dataset,
    epochs=20,
    validation_data=val_dataset,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

# Stage 2: Fine-tuning
print("\nStage 2: Fine-tuning with unfrozen layers...")

# Unfreeze the base model gradually
base_model = model.layers[0]
base_model.trainable = True

# Freeze first 150 layers, unfreeze the rest
for layer in base_model.layers[:150]:
    layer.trainable = False
for layer in base_model.layers[150:]:
    layer.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

# Continue training
history2 = model.fit(
    train_dataset,
    epochs=15,
    validation_data=val_dataset,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

# Final evaluation
print("\nFinal Evaluation on Test Set:")
test_dataset = create_tf_dataset(test_gen)
test_results = model.evaluate(test_dataset, verbose=1)
print(f"Test Accuracy: {test_results[1]:.4f}")
print(f"Test Precision: {test_results[2]:.4f}")
print(f"Test Recall: {test_results[3]:.4f}")

# Save the final model
model.save('final_tumor_model.h5')
print("Model saved as 'final_tumor_model.h5'")

# Plot training history
import matplotlib.pyplot as plt

def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()

    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()

    plt.tight_layout()
    plt.show()

# Plot both training stages
plot_training_history(history1)
plot_training_history(history2)

print("Training completed!")
print(f"Best validation accuracy: {max(history1.history['val_accuracy'] + history2.history['val_accuracy']):.4f}")

# Additional performance tips
print("\n" + "="*50)
print("PERFORMANCE TIPS:")
print("1. Use Colab Pro for better GPU (A100)")
print("2. Reduce image size to 192x192 if accuracy allows")
print("3. Use larger batch size if GPU memory permits")
print("4. Consider using TPU for even faster training")
print("="*50)

Num GPUs Available:  1
Mixed precision enabled: mixed_float16
Found 4579 images belonging to 4 classes.
Found 1143 images belonging to 4 classes.
Found 1321 images belonging to 4 classes.
Class weights: {0: np.float64(1.0830179754020814), 1: np.float64(1.0678638059701493), 2: np.float64(0.8915498442367601), 3: np.float64(0.9817753001715266)}


Stage 1: Training top layers...
Epoch 1/20
    872/Unknown [1m2190s[0m 2s/step - accuracy: 0.2729 - loss: 2.7585 - precision: 0.2815 - recall: 0.1164

KeyboardInterrupt: 

In [1]:
# Ultra-fast tumor detection model
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2  # Much faster than EfficientNet
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import gc

# Check GPU availability and enable XLA for faster computation
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.config.optimizer.set_jit(True)  # Enable XLA compilation

# Clear memory
tf.keras.backend.clear_session()
gc.collect()

# Minimal data augmentation for speed
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Use much smaller image size
IMG_SIZE = 128  # Reduced to 128x128 for 4x faster processing

train_dir = '/content/drive/MyDrive/archive (2)/Training'
test_dir = '/content/drive/MyDrive/archive (2)/Testing'

# Create data generators with optimal batch size
BATCH_SIZE = 32  # Increased for better GPU utilization

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True,
    subset='training'
)

val_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False,
    subset='validation'
)

test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

print(f"Training samples: {train_gen.samples}")
print(f"Validation samples: {val_gen.samples}")
print(f"Test samples: {test_gen.samples}")

# Create ultra-fast model with MobileNetV2
def create_ultra_fast_model(num_classes):
    # MobileNetV2 is much faster than EfficientNet
    base_model = MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        pooling='avg',
        alpha=0.35  # Smaller version for speed
    )

    # Freeze the base model
    base_model.trainable = False

    # Very simple classifier
    model = Sequential([
        base_model,
        Dropout(0.4),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])

    return model

# Create model
model = create_ultra_fast_model(train_gen.num_classes)
model.summary()

# Compile with higher learning rate for faster convergence
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Simple callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        mode='max'
    ),
    ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.5,
        patience=2,
        min_lr=1e-6,
        mode='max'
    )
]

# Use direct generator for maximum speed (no tf.data conversion)
print("Stage 1: Training top layers...")
history1 = model.fit(
    train_gen,
    epochs=8,  # Very few epochs
    validation_data=val_gen,
    callbacks=callbacks,
    verbose=1
)

# Stage 2: Quick fine-tuning
print("\nStage 2: Quick fine-tuning...")
base_model = model.layers[0]
base_model.trainable = True

# Only unfreeze last few layers
for layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(
    optimizer=Adam(learning_rate=1e-4),  # Lower LR for fine-tuning
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history2 = model.fit(
    train_gen,
    epochs=4,  # Very short fine-tuning
    validation_data=val_gen,
    callbacks=callbacks,
    verbose=1
)

# Final evaluation
print("\nFinal Evaluation on Test Set:")
test_results = model.evaluate(test_gen, verbose=1)
print(f"Test Accuracy: {test_results[1]:.4f}")

# Save the model
model.save('fast_tumor_model.h5')
print("Model saved!")

# Quick performance test
import time
start_time = time.time()
sample_pred = model.predict(test_gen, steps=1, verbose=0)
end_time = time.time()
print(f"Prediction speed: {end_time - start_time:.3f} seconds per batch")

print("Training completed!")
best_val_acc = max(history1.history['val_accuracy'] + history2.history['val_accuracy'])
print(f"Best validation accuracy: {best_val_acc:.4f}")

# If accuracy is low, we can try a bit longer training
if best_val_acc < 0.85:
    print("\nAccuracy below 85%, extending training...")
    model.compile(
        optimizer=Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    history3 = model.fit(
        train_gen,
        epochs=3,
        validation_data=val_gen,
        verbose=1
    )

    final_test = model.evaluate(test_gen, verbose=0)
    print(f"Final Test Accuracy: {final_test[1]:.4f}")

Num GPUs Available:  1
Found 4579 images belonging to 4 classes.
Found 1143 images belonging to 4 classes.
Found 1321 images belonging to 4 classes.
Training samples: 4579
Validation samples: 1143
Test samples: 1321
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_0.35_128_no_top.h5
[1m2019640/2019640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step


Stage 1: Training top layers...


  self._warn_if_super_not_called()


Epoch 1/8
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 532ms/step - accuracy: 0.6580 - loss: 0.8837 - val_accuracy: 0.7104 - val_loss: 0.8262 - learning_rate: 0.0010
Epoch 2/8
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 339ms/step - accuracy: 0.8150 - loss: 0.4796 - val_accuracy: 0.7253 - val_loss: 0.7503 - learning_rate: 0.0010
Epoch 3/8
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 332ms/step - accuracy: 0.8470 - loss: 0.4082 - val_accuracy: 0.7437 - val_loss: 0.6858 - learning_rate: 0.0010
Epoch 4/8
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 338ms/step - accuracy: 0.8495 - loss: 0.4015 - val_accuracy: 0.7778 - val_loss: 0.5810 - learning_rate: 0.0010
Epoch 5/8
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 342ms/step - accuracy: 0.8661 - loss: 0.3714 - val_accuracy: 0.7515 - val_loss: 0.6813 - learning_rate: 0.0010
Epoch 6/8
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



Test Accuracy: 0.8357
Model saved!
Prediction speed: 5.731 seconds per batch
Training completed!
Best validation accuracy: 0.7944

Accuracy below 85%, extending training...
Epoch 1/3
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 467ms/step - accuracy: 0.8776 - loss: 0.3212 - val_accuracy: 0.7375 - val_loss: 0.8223
Epoch 2/3
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 338ms/step - accuracy: 0.8842 - loss: 0.3158 - val_accuracy: 0.7454 - val_loss: 0.8029
Epoch 3/3
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 334ms/step - accuracy: 0.8994 - loss: 0.2755 - val_accuracy: 0.7472 - val_loss: 0.7912
Final Test Accuracy: 0.8418


In [None]:
print("GPU Available: ", tf.config.list_physical_devices('GPU'))
print("Is GPU being used: ", tf.test.is_gpu_available())

In [None]:
# Check your current batch size
print("Batch size:", train_gen.batch_size)
print("Total batches:", len(train_gen))

# If batch size is too small (like 16), increase it:
train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=64,  # Increase from 32 to 64 or higher
    class_mode='categorical'
)

In [None]:
from transformers import pipeline

# Load zero-shot classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

categories = [
    "A personal reflection or lesson learned",
    "A goal or aspiration (including financial goals)",
    "An emotion or feeling being expressed",
    "A plan or task for the future"
]
# Example entries
entries = [
    "Today I realized I waste too much time scrolling on my phone.",
    "I want to save R500 this month to build my emergency fund.",
    "I feel really anxious about my exams tomorrow.",
    "Tomorrow I’ll buy stock and finish one chapter of business intelligence."
]

# Run classification
for entry in entries:
    result = classifier(entry, candidate_labels=categories)
    print(f"\nEntry: {entry}")
    print("Prediction:", result["labels"][0], "| Confidence:", round(result["scores"][0], 2))


Device set to use cuda:0



Entry: Today I realized I waste too much time scrolling on my phone.
Prediction: A personal reflection or lesson learned | Confidence: 0.91

Entry: I want to save R500 this month to build my emergency fund.
Prediction: A goal or aspiration (including financial goals) | Confidence: 0.54

Entry: I feel really anxious about my exams tomorrow.
Prediction: An emotion or feeling being expressed | Confidence: 0.91

Entry: Tomorrow I’ll buy stock and finish one chapter of business intelligence.
Prediction: A plan or task for the future | Confidence: 0.58


In [None]:
from transformers import pipeline

# Load zero-shot classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Descriptive labels for better accuracy
categories = {
    "Reflection": "A personal reflection or lesson learned",
    "Goals": "A goal or aspiration, including financial goals",
    "Emotions": "An emotion or feeling being expressed",
    "Plans": "A plan, task, or intention for the future"
}

# Example entries
entries = [
    "Today I realized I waste too much time scrolling on my phone.",
    "I want to save R500 this month to build my emergency fund.",
    "I feel really anxious about my exams tomorrow.",
    "Tomorrow I’ll buy stock and finish one chapter of business intelligence."
]

# Run classification
for entry in entries:
    result = classifier(entry, candidate_labels=list(categories.values()))

    # Find which short label matches the predicted descriptive label
    predicted_desc = result["labels"][0]
    predicted_label = [k for k, v in categories.items() if v == predicted_desc][0]

    print(f"\nEntry: {entry}")
    print("Prediction:", predicted_label, "| Confidence:", round(result["scores"][0], 2))


Device set to use cuda:0



Entry: Today I realized I waste too much time scrolling on my phone.
Prediction: Reflection | Confidence: 0.9

Entry: I want to save R500 this month to build my emergency fund.
Prediction: Goals | Confidence: 0.7

Entry: I feel really anxious about my exams tomorrow.
Prediction: Emotions | Confidence: 0.9

Entry: Tomorrow I’ll buy stock and finish one chapter of business intelligence.
Prediction: Plans | Confidence: 0.47


In [None]:
from transformers import pipeline

# Load zero-shot classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Descriptive labels for better accuracy
categories = {
    "Reflection": "A personal reflection or lesson learned",
    "Goals": "A goal or aspiration, including financial goals",
    "Emotions": "An emotion or feeling being expressed",
    "Plans": "A plan, task, or intention for the future"
}

# Example entries
entries = [
    "Today I realized I waste too much time scrolling on my phone.",
    "I want to save R500 this month to build my emergency fund.",
    "I feel really anxious about my exams tomorrow.",
    "Tomorrow I’ll buy stock and finish one chapter of business intelligence."
]

# Run classification
for entry in entries:
    result = classifier(entry, candidate_labels=list(categories.values()))

    # Get top 2 results
    top2_labels = result["labels"][:2]
    top2_scores = result["scores"][:2]

    # Map back to short labels
    mapped_results = [
        ([k for k, v in categories.items() if v == desc][0], round(score, 2))
        for desc, score in zip(top2_labels, top2_scores)
    ]

    print(f"\nEntry: {entry}")
    for label, score in mapped_results:
        print(f"Prediction: {label} | Confidence: {score}")


Device set to use cuda:0



Entry: Today I realized I waste too much time scrolling on my phone.
Prediction: Reflection | Confidence: 0.9
Prediction: Emotions | Confidence: 0.07

Entry: I want to save R500 this month to build my emergency fund.
Prediction: Goals | Confidence: 0.7
Prediction: Plans | Confidence: 0.13

Entry: I feel really anxious about my exams tomorrow.
Prediction: Emotions | Confidence: 0.9
Prediction: Reflection | Confidence: 0.05

Entry: Tomorrow I’ll buy stock and finish one chapter of business intelligence.
Prediction: Plans | Confidence: 0.47
Prediction: Goals | Confidence: 0.32


In [None]:
from transformers import pipeline

# Load classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Descriptive labels
categories = {
    "Reflection": "A personal reflection or lesson learned",
    "Goals": "A goal or aspiration, including financial goals",
    "Emotions": "An emotion or feeling being expressed",
    "Plans": "A plan, task, or intention for the future"
}

# Example entry
entry = "The silence in this house is so loud it has a texture. I can feel it pressing against my eardrums."

# Run classification
result = classifier(entry, candidate_labels=list(categories.values()))

# Get top 2 results
top2_labels = result["labels"][:2]
top2_scores = result["scores"][:2]

# Map back to short labels
mapped_results = [
    ([k for k, v in categories.items() if v == desc][0], round(score, 2))
    for desc, score in zip(top2_labels, top2_scores)
]

# Store results
classified_entry = {
    "entry": entry,
    "main_category": mapped_results[0][0],
    "secondary_category": mapped_results[1][0],
    "confidence_scores": {label: score for label, score in mapped_results}
}

print(classified_entry)


Device set to use cuda:0


{'entry': 'The silence in this house is so loud it has a texture. I can feel it pressing against my eardrums.', 'main_category': 'Emotions', 'secondary_category': 'Reflection', 'confidence_scores': {'Emotions': 0.65, 'Reflection': 0.14}}


In [None]:
import numpy as np
import pandas as pd