Deep Learning Model


In [1]:
print("Hello")

Hello


In [5]:
#include <cuda_runtime.h>
#include <iostream>

#define N 1 << 20  // 1 million elements
#define BLOCK_SIZE 256

// Error checking macro
#define CHECK(call) \
    { \
        const cudaError_t err = call; \
        if (err != cudaSuccess) { \
            std::cerr << "CUDA error: " << cudaGetErrorString(err) << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
            exit(EXIT_FAILURE); \
        } \
    }

// Optimized kernel using grid-stride loop and __restrict__
__global__ void vectorAdd(const float *__restrict__ A, const float *__restrict__ B, float *__restrict__ C, int n) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    
    // Grid-stride loop for larger arrays
    for (int i = idx; i < n; i += blockDim.x * gridDim.x) {
        C[i] = A[i] + B[i];
    }
}

int main() {
    float *h_A, *h_B, *h_C;
    float *d_A, *d_B, *d_C;

    size_t bytes = N * sizeof(float);

    // Pinned memory allocation for faster transfer
    CHECK(cudaMallocHost(&h_A, bytes));
    CHECK(cudaMallocHost(&h_B, bytes));
    CHECK(cudaMallocHost(&h_C, bytes));

    for (int i = 0; i < N; ++i) {
        h_A[i] = i * 0.5f;
        h_B[i] = i * 2.0f;
    }

    // Device memory
    CHECK(cudaMalloc(&d_A, bytes));
    CHECK(cudaMalloc(&d_B, bytes));
    CHECK(cudaMalloc(&d_C, bytes));

    // Async memory copy to overlap with computation (needs streams for full overlap)
    CHECK(cudaMemcpy(d_A, h_A, bytes, cudaMemcpyHostToDevice));
    CHECK(cudaMemcpy(d_B, h_B, bytes, cudaMemcpyHostToDevice));

    int gridSize = (N + BLOCK_SIZE - 1) / BLOCK_SIZE;

    // Launch optimized kernel
    vectorAdd<<<gridSize, BLOCK_SIZE>>>(d_A, d_B, d_C, N);
    CHECK(cudaGetLastError());

    // Copy result back
    CHECK(cudaMemcpy(h_C, d_C, bytes, cudaMemcpyDeviceToHost));

    // Sample output check
    std::cout << "C[100] = " << h_C[100] << std::endl;

    // Cleanup
    cudaFree(d_A);
    cudaFree(d_B);
    cudaFree(d_C);
    cudaFreeHost(h_A);
    cudaFreeHost(h_B);
    cudaFreeHost(h_C);

    return 0;
}


SyntaxError: invalid decimal literal (2549014608.py, line 39)

In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm


In [3]:

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [4]:
# Paths
data_dir = r"C:\ELCDATASET\Dataset"
train_dir = os.path.join(data_dir, 'Training')
test_dir = os.path.join(data_dir, 'Test')


In [5]:

# Transforms
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(300, scale=(0.6, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(25),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.15))
])

test_transforms = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])


In [6]:

# Datasets
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform=test_transforms)
class_names = train_dataset.classes
num_classes = len(class_names)


In [7]:

# Handle class imbalance
class_counts = np.bincount(train_dataset.targets)
weights = 1. / class_counts[train_dataset.targets]
sampler = WeightedRandomSampler(weights, len(weights))

In [8]:

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, sampler=sampler)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [9]:

# Model setup
model = EfficientNet.from_pretrained('efficientnet-b3')

Loaded pretrained weights for efficientnet-b3


In [10]:

# Unfreeze selected layers
for name, param in model.named_parameters():
    if "_blocks.30" in name or "_conv_head" in name or "_bn2" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False


In [11]:

# Replace classifier
model._dropout = nn.Dropout(p=0.5)
model._fc = nn.Sequential(
    nn.Linear(model._fc.in_features, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.6),
    nn.Linear(128, num_classes)
)
model = model.to(device)


In [12]:

# Loss, optimizer, scheduler
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=25)

In [None]:

# Training setup
num_epochs =30  
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
best_val_loss = float('inf')
early_stop_patience = 5
early_stop_counter = 0

In [None]:
num_epochs =30  
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
best_val_loss = float('inf')
early_stop_patience = 5
early_stop_counter = 0

# Training loop
for epoch in range(num_epochs):
    print(f"\nEpoch [{epoch+1}/{num_epochs}] | LR: {optimizer.param_groups[0]['lr']:.6f}")
    model.train()
    total_loss, correct, total = 0, 0, 0
    loop = tqdm(train_loader, desc="Training")

    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        loop.set_postfix(loss=loss.item(), acc=100. * correct / total)

    train_loss = total_loss / len(train_loader.dataset)
    train_acc = 100. * correct / total
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss /= len(test_loader.dataset)
    val_acc = 100. * val_correct / val_total
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    scheduler.step()

    print(f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')
        print("  Saved best model")
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print(" Early stopping triggered.")
            break
        


Epoch [1/30] | LR: 0.000100


Training:  94%|█████████▍| 3370/3577 [4:04:06<17:10,  4.98s/it, acc=58.3, loss=1.39]  

In [None]:

# Final Classification Report
print("\n Classification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Accuracy & Loss plot
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(train_accuracies, label='Train Acc')
plt.plot(val_accuracies, label='Val Acc')
plt.legend(); plt.grid(); plt.title('Accuracy')
plt.subplot(1, 2, 2)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend(); 
plt.grid();
plt.title('Loss')
plt.tight_layout(); plt.show()

In [None]:

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.show()

