In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# 📦 1. Imports
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, WeightedRandomSampler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# 📍 2. Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


In [None]:
# 🧹 3. Data preparation and augmentation
data_dir = "/kaggle/input/tuberculosisdatas/TB_Data"

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(os.path.join(data_dir, "Train"), transform=train_transforms)
val_dataset   = datasets.ImageFolder(os.path.join(data_dir, "Validation"), transform=val_test_transforms)
test_dataset  = datasets.ImageFolder(os.path.join(data_dir, "Test"), transform=val_test_transforms)
2

In [None]:
# ⚖️ 4. Handle class imbalance with WeightedRandomSampler
class_counts = [0] * len(train_dataset.classes)
for _, label in train_dataset:
    class_counts[label] += 1

class_weights = [1.0 / class_counts[label] for _, label in train_dataset]
sampler = WeightedRandomSampler(class_weights, num_samples=len(class_weights), replacement=True)


In [None]:
# 📦 5. DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# 🧠 6. Load ConvNeXt-Tiny and modify classifier
model = models.convnext_tiny(pretrained=True)
for param in model.features.parameters():
    param.requires_grad = False  # Freeze backbone

# Modify classifier
model.classifier[2] = nn.Linear(model.classifier[2].in_features, 2)
model = model.to(device)


In [None]:
# 🧮 7. Loss, optimizer, scheduler
# Weighted loss for imbalance
weights = torch.tensor([1.0 / class_counts[0], 1.0 / class_counts[1]], device=device)
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


In [None]:
# 🔁 8. Training loop with validation
num_epochs = 50
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_correct += (predicted == labels).sum().item()
            val_total += labels.size(0)

    val_loss = val_loss / len(val_loader)
    val_acc = val_correct / val_total
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    scheduler.step()

    print(f"Epoch [{epoch+1}/{num_epochs}] => Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")


In [None]:
# 📈 9. Plotting loss and accuracy curves
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label="Train Accuracy")
plt.plot(val_accuracies, label="Validation Accuracy")
plt.title("Accuracy Curve")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import (
    confusion_matrix, classification_report, roc_curve,
    roc_auc_score, accuracy_score, precision_score,
    recall_score, f1_score
)
import seaborn as sns


In [None]:
model.eval()
all_labels = []
all_preds = []
all_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)[:, 1]  # Probabilities for class "Tuberculosis"
        _, predicted = torch.max(outputs, 1)
        
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())


In [None]:
print("📋 Classification Report:\n")
print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))

acc  = accuracy_score(all_labels, all_preds)
prec = precision_score(all_labels, all_preds)
rec  = recall_score(all_labels, all_preds)
f1   = f1_score(all_labels, all_preds)
auc  = roc_auc_score(all_labels, all_probs)

print(f"✅ Accuracy:  {acc:.4f}")
print(f"✅ Precision: {prec:.4f}")
print(f"✅ Recall:    {rec:.4f}")
print(f"✅ F1 Score:  {f1:.4f}")
print(f"✅ AUC Score: {auc:.4f}")


In [None]:
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=train_dataset.classes, yticklabels=train_dataset.classes)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
fpr, tpr, _ = roc_curve(all_labels, all_probs)
plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"AUC = {auc:.4f}")
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
def get_accuracy(model, dataloader, name="Dataset"):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    acc = 100 * correct / total
    print(f"✅ {name} Accuracy: {acc:.2f}%")
    return acc


In [None]:
train_acc_percent = get_accuracy(model, train_loader, "Train")
val_acc_percent   = get_accuracy(model, val_loader, "Validation")
test_acc_percent  = get_accuracy(model, test_loader, "Test")


In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torch

# Path to the uploaded image
image_path = '/kaggle/input/tuberculosisdatas/TB_Data/Test/Normal/Normal-1047.png'  # 👈 Change if needed

# Load and preprocess the image
image = Image.open(image_path).convert('RGB')

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])  # ✅ Matches ConvNeXt training
])

input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

# Make prediction
model.eval()  # Use ConvNeXt model here
with torch.no_grad():
    output = model(input_tensor)
    predicted_class = torch.argmax(output, 1).item()
    confidence = torch.softmax(output, dim=1)[0][predicted_class].item() * 100

# Class names (based on ImageFolder)
class_names = ['Normal', 'Tuberculosis']

# Output results
print(f"✅ Predicted Class: {class_names[predicted_class]}")
print(f"🔢 Confidence: {confidence:.2f}%")

# Show the image with prediction
plt.imshow(image)
plt.axis('off')
plt.title(f"Prediction: {class_names[predicted_class]} ({confidence:.2f}%)")
plt.show()


In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torch

# Path to the uploaded image
image_path = '/kaggle/input/tuberculosisdatas/TB_Data/Test/Tuberculosis/Tuberculosis-121.png'  # 👈 Change if needed

# Load and preprocess the image
image = Image.open(image_path).convert('RGB')

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])  # ✅ Matches ConvNeXt training
])

input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

# Make prediction
model.eval()  # Use ConvNeXt model here
with torch.no_grad():
    output = model(input_tensor)
    predicted_class = torch.argmax(output, 1).item()
    confidence = torch.softmax(output, dim=1)[0][predicted_class].item() * 100

# Class names (based on ImageFolder)
class_names = ['Normal', 'Tuberculosis']

# Output results
print(f"✅ Predicted Class: {class_names[predicted_class]}")
print(f"🔢 Confidence: {confidence:.2f}%")

# Show the image with prediction
plt.imshow(image)
plt.axis('off')
plt.title(f"Prediction: {class_names[predicted_class]} ({confidence:.2f}%)")
plt.show()


In [None]:
# Save the entire model
torch.save(model.state_dict(), 'convnext_tiny_tb.pth')
print("✅ Model saved as 'convnext_tiny_tb.pth'")
