In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


In [None]:
data_dir = "/kaggle/input/tuberculosisxray/TB_Data"

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(os.path.join(data_dir, "Train"), transform=train_transforms)
val_dataset = datasets.ImageFolder(os.path.join(data_dir, "Validation"), transform=val_test_transforms)
test_dataset = datasets.ImageFolder(os.path.join(data_dir, "Test"), transform=val_test_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class_names = train_dataset.classes
print(f"Classes: {class_names}")


In [None]:
model = models.resnext50_32x4d(pretrained=True)

# Freeze feature extractor
for param in model.parameters():
    param.requires_grad = False

# Replace classifier head
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 2)
)

model = model.to(device)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


In [None]:
num_epochs = 15
train_loss_history, val_loss_history = [], []
train_acc_history, val_acc_history = [], []

best_val_loss = float('inf')
patience, trigger_times = 3, 0  # early stopping

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    
    # --- Training ---
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    train_loss_history.append(epoch_loss)
    train_acc_history.append(epoch_acc)

    # --- Validation ---
    model.eval()
    val_running_loss, val_correct, val_total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)
    
    val_loss = val_running_loss / val_total
    val_acc = val_correct / val_total
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    print(f"Train Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

   


In [None]:
epochs_range = range(len(train_loss_history))

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_loss_history, label='Train Loss')
plt.plot(epochs_range, val_loss_history, label='Val Loss')
plt.legend()
plt.title("Loss Curve")

plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_acc_history, label='Train Accuracy')
plt.plot(epochs_range, val_acc_history, label='Val Accuracy')
plt.legend()
plt.title("Accuracy Curve")

plt.show()


In [None]:
# Save model if it's the best one so far
if val_loss < best_val_loss:
    best_val_loss = val_loss
    torch.save(model.state_dict(), "best_resnext_model.pth")
    trigger_times = 0


In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the best model saved earlier
model.load_state_dict(torch.load("best_resnext_model.pth"))
model.eval()

y_true = []
y_pred = []

# Disable gradient calculations during inference
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())


In [None]:
# Accuracy
acc = accuracy_score(y_true, y_pred)
print(f"Test Accuracy: {acc:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))


In [None]:
import matplotlib.pyplot as plt

def imshow(inp, title=None):
    inp = inp.permute(1, 2, 0).numpy()
    mean = np.array([0.485, 0.456, 0.406])
    std  = np.array([0.229, 0.224, 0.225])
    inp  = std * inp + mean
    inp  = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.axis("off")

# Show a few test images with predicted and true labels
dataiter = iter(test_loader)
images, labels = next(dataiter)

model.eval()
with torch.no_grad():
    outputs = model(images.to(device))
    _, preds = torch.max(outputs, 1)

# Plot first 5 images
plt.figure(figsize=(12, 6))
for idx in range(5):
    plt.subplot(1, 5, idx + 1)
    imshow(images[idx].cpu(), title=f"Pred: {class_names[preds[idx]]}\nTrue: {class_names[labels[idx]]}")
plt.show()


In [None]:
print(f"\nBest Train Accuracy: {max(train_acc_history):.4f}")
print(f"Best Validation Accuracy: {max(val_acc_history):.4f}")


In [None]:
from sklearn.metrics import accuracy_score

# Load best model
model.load_state_dict(torch.load("best_resnext_model.pth"))
model.eval()

y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

test_acc = accuracy_score(y_true, y_pred)
print(f"\nTest Accuracy: {test_acc:.4f}")


In [None]:
print("\n--- Final Accuracy Summary ---")
print(f"Train Accuracy      : {max(train_acc_history):.4f}")
print(f"Validation Accuracy : {max(val_acc_history):.4f}")
print(f"Test Accuracy       : {test_acc:.4f}")


In [None]:
# After loading best model and running inference on test data
y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Replace with your actual class names if needed
class_names = ['Normal', 'Tuberculosis']

# Create confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Normalize (optional)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.figure(figsize=(6, 5))
sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap="Blues",
            xticklabels=class_names, yticklabels=class_names)

plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Normalized Confusion Matrix')
plt.show()


In [None]:
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=class_names, yticklabels=class_names)


In [None]:
if val_loss < best_val_loss:
    best_val_loss = val_loss
    torch.save(model.state_dict(), "best_resnext_model.pth")
    print("✅ Model saved at best_val_loss:", best_val_loss)


In [None]:
torch.save(model.state_dict(), "final_resnext_model.pth")
print("✅ Final model saved as 'final_resnext_model.pth'")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch

# Function to denormalize and display an image
def imshow(img, title=None):
    img = img.cpu().numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std  = np.array([0.229, 0.224, 0.225])
    img  = std * img + mean
    img  = np.clip(img, 0, 1)
    plt.imshow(img)
    if title:
        plt.title(title)
    plt.axis('off')

# Get a batch of test images
dataiter = iter(test_loader)
images, labels = next(dataiter)
images = images.to(device)
labels = labels.to(device)

# Predict
model.eval()
with torch.no_grad():
    outputs = model(images)
    _, preds = torch.max(outputs, 1)

# Define class names (adjust if yours are different)
class_names = ['Normal', 'Tuberculosis']

# Visualize first 8 images
plt.figure(figsize=(16, 8))
for idx in range(8):
    ax = plt.subplot(2, 4, idx + 1)
    correct = preds[idx] == labels[idx]
    color = 'green' if correct else 'red'
    
    title = f"Pred: {class_names[preds[idx]]}\nTrue: {class_names[labels[idx]]}"
    imshow(images[idx], title=title)
    plt.title(title, color=color, fontsize=10)
plt.tight_layout()
plt.show()


In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torch
import os

# ✅ Path to the uploaded image (change this)
image_path = '/kaggle/input/tuberculosisxray/TB_Data/Test/Normal/Normal-1278.png'

# ✅ Automatically extract the true label from folder name
true_label = os.path.basename(os.path.dirname(image_path))  # 'Normal' or 'Tuberculosis'

# ✅ Load and preprocess the image
image = Image.open(image_path).convert('RGB')

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

# ✅ Make prediction using your trained model
model.eval()
with torch.no_grad():
    output = model(input_tensor)
    predicted_class = torch.argmax(output, 1).item()
    confidence = torch.softmax(output, dim=1)[0][predicted_class].item() * 100

# ✅ Class names
class_names = ['Normal', 'Tuberculosis']
predicted_label = class_names[predicted_class]

# ✅ Compare prediction and true label
is_correct = (predicted_label == true_label)
status = "✅ Correct" if is_correct else "❌ Wrong"
color = 'green' if is_correct else 'red'

# ✅ Print results
print(f"Predicted Class: {predicted_label}")
print(f"Confidence: {confidence:.2f}%")
print(f"True Label: {true_label} → {status}")

# ✅ Show image with prediction info
plt.imshow(image)
plt.axis('off')
plt.title(f"Prediction: {predicted_label} ({confidence:.2f}%)\nTrue: {true_label} → {status}", 
          color=color, fontsize=12)
plt.show()


In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torch
import os

# ✅ Path to the uploaded image (change this)
image_path = '/kaggle/input/tuberculosisxray/TB_Data/Test/Tuberculosis/Tuberculosis-117.png'

# ✅ Automatically extract the true label from folder name
true_label = os.path.basename(os.path.dirname(image_path))  # 'Normal' or 'Tuberculosis'

# ✅ Load and preprocess the image
image = Image.open(image_path).convert('RGB')

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

input_tensor = transform(image).unsqueeze(0).to(device)  # Add batch dimension

# ✅ Make prediction using your trained model
model.eval()
with torch.no_grad():
    output = model(input_tensor)
    predicted_class = torch.argmax(output, 1).item()
    confidence = torch.softmax(output, dim=1)[0][predicted_class].item() * 100

# ✅ Class names
class_names = ['Normal', 'Tuberculosis']
predicted_label = class_names[predicted_class]

# ✅ Compare prediction and true label
is_correct = (predicted_label == true_label)
status = "✅ Correct" if is_correct else "❌ Wrong"
color = 'green' if is_correct else 'red'

# ✅ Print results
print(f"Predicted Class: {predicted_label}")
print(f"Confidence: {confidence:.2f}%")
print(f"True Label: {true_label} → {status}")

# ✅ Show image with prediction info
plt.imshow(image)
plt.axis('off')
plt.title(f"Prediction: {predicted_label} ({confidence:.2f}%)\nTrue: {true_label} → {status}", 
          color=color, fontsize=12)
plt.show()
