<a href="https://colab.research.google.com/github/Shahriar88/python_learning/blob/main/NN_Basic_v0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## BasicNN

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# 1. Create dummy dataset (100 samples, 10 features each)
features = 10
samples = 100
n_class = 2
X = torch.randn(samples, features)   # inputs
y = torch.randint(0, n_class, (samples,))  # binary labels (0 or 1)

In [3]:
# 2. Define a simple feedforward network
class BasicNN(nn.Module):
    def __init__(self):
        super(BasicNN, self).__init__()
        self.fc1 = nn.Linear(features, 16)   # input -> hidden nn.Linear(10-input, 16-output)
        self.relu = nn.ReLU()

        self.fc2 = nn.Linear(16, n_class)    # hidden -> output (2 classes)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [4]:
# 2.1 Declare Model
model = BasicNN()
# 3. Loss function and optimizer
criterion = nn.CrossEntropyLoss() # Classification
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [5]:
# 4. Training loop
for epoch in range(20):
    # Forward pass
    outputs = model(X)
    loss = criterion(outputs, y)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/20], Loss: {loss.item():.4f}")

Epoch [1/20], Loss: 0.7343
Epoch [2/20], Loss: 0.7143
Epoch [3/20], Loss: 0.6990
Epoch [4/20], Loss: 0.6876
Epoch [5/20], Loss: 0.6789
Epoch [6/20], Loss: 0.6719
Epoch [7/20], Loss: 0.6657
Epoch [8/20], Loss: 0.6599
Epoch [9/20], Loss: 0.6541
Epoch [10/20], Loss: 0.6479
Epoch [11/20], Loss: 0.6411
Epoch [12/20], Loss: 0.6341
Epoch [13/20], Loss: 0.6266
Epoch [14/20], Loss: 0.6190
Epoch [15/20], Loss: 0.6116
Epoch [16/20], Loss: 0.6048
Epoch [17/20], Loss: 0.5982
Epoch [18/20], Loss: 0.5916
Epoch [19/20], Loss: 0.5851
Epoch [20/20], Loss: 0.5784


In [6]:
# 5. Test prediction
with torch.no_grad():
    test_input = torch.randn(1, 10)
    prediction = torch.argmax(model(test_input))
    print("Predicted class:", prediction.item())

Predicted class: 0


## BasicDL

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# 1. Create dummy dataset (100 samples, 20 features each)
X = torch.randn(100, 20)          # inputs
y = torch.randint(0, 3, (100,))   # labels for 3 classes

In [None]:
# 2. Define a deeper feedforward network
class DeepNN(nn.Module):
    def __init__(self):
        super(DeepNN, self).__init__()
        self.fc1 = nn.Linear(20, 64)   # input -> hidden1
        self.fc2 = nn.Linear(64, 128)  # hidden1 -> hidden2
        self.fc3 = nn.Linear(128, 64)  # hidden2 -> hidden3
        self.fc4 = nn.Linear(64, 3)    # hidden3 -> output (3 classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3) # regularization

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [None]:
model = DeepNN()

# 3. Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# 4. Training loop
for epoch in range(10):
    outputs = model(X)
    loss = criterion(outputs, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/10], Loss: {loss.item():.4f}")

In [None]:
# 5. Test prediction
with torch.no_grad():
    test_input = torch.randn(1, 20)
    prediction = torch.argmax(model(test_input))
    print("Predicted class:", prediction.item())

## Basic CNN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# 1. Create dummy dataset (100 samples, 1 channel, 28x28 "images")

in_channels = 1 # 1->Gray Scale, 3-RGB
sample = 100
n_class = 10

X = torch.randn(in_channels, in_channels, 28, 28)   # grayscale images
y = torch.randint(0, n_class, (in_channels,))  # 10 classes

# 2. Define a simple ConvNet
class BasicConvNN(nn.Module):
    def __init__(self):
        super(BasicConvNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels = 8, kernel_size=3, stride=1, padding=1)  # (N,1,28,28) -> (N,8,28,28)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)  # (N,8,28,28) -> (N,8,14,14)

        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1) # (N,16,14,14)
        self.fc1 = nn.Linear(16 * 7 * 7, 64)  # after pooling again -> (N,16,7,7)
        self.fc2 = nn.Linear(64, n_class)          # 10 output classes

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)   # flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
model = BasicConvNN()

# 3. Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
# 4. Training loop
for epoch in range(5):
    outputs = model(X)
    loss = criterion(outputs, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/5], Loss: {loss.item():.4f}")

In [None]:
# 5. Test prediction
with torch.no_grad():
    test_input = torch.randn(1, 1, 28, 28)
    prediction = torch.argmax(model(test_input))
    print("Predicted class:", prediction.item())

## Basic LSTM

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# 1. Create dummy sequential dataset
# Let's say we have 100 sequences, each of length 5, with 10 features at each timestep
X = torch.randn(100, 5, 10)          # shape: (batch, seq_len, input_dim)
y = torch.randint(0, 2, (100,))      # binary classification (0 or 1)

In [None]:
# 2. Define a simple LSTM-based model
class BasicLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
        super(BasicLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # LSTM output: all hidden states + (last_hidden, last_cell)
        out, (hn, cn) = self.lstm(x)   # hn shape: (num_layers, batch, hidden_dim)
        out = self.fc(hn[-1])          # take last layer’s hidden state
        return out

In [None]:
model = BasicLSTM(input_dim=10, hidden_dim=32, output_dim=2)

# 3. Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
# 4. Training loop
for epoch in range(10):
    outputs = model(X)
    loss = criterion(outputs, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/10], Loss: {loss.item():.4f}")

In [None]:
# 5. Test prediction
with torch.no_grad():
    test_input = torch.randn(1, 5, 10)  # one sequence, length 5, 10 features
    prediction = torch.argmax(model(test_input))
    print("Predicted class:", prediction.item())

## Basic Transformer

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# 1. Create dummy dataset
# 100 sequences, each of length 5, with 16 features
X = torch.randn(100, 5, 16)         # (batch, seq_len, input_dim)
y = torch.randint(0, 3, (100,))     # 3 classes

In [None]:
# 2. Define a simple Transformer-based model
class BasicTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, num_classes):
        super(BasicTransformer, self).__init__()

        # project input features into model dimension
        self.embedding = nn.Linear(input_dim, model_dim)

        # define one encoder layer and stack them
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=model_dim, nhead=num_heads, dim_feedforward=128, dropout=0.1, batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # classification head
        self.fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)             # (batch, seq_len, model_dim)
        x = self.transformer(x)           # apply self-attention
        x = x.mean(dim=1)                 # simple pooling over sequence
        out = self.fc(x)                  # class scores
        return out

In [None]:
model = BasicTransformer(input_dim=16, model_dim=32, num_heads=2, num_layers=2, num_classes=3)

# 3. Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# 4. Training loop
for epoch in range(5):
    outputs = model(X)
    loss = criterion(outputs, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/5], Loss: {loss.item():.4f}")

In [None]:
# 5. Test prediction
with torch.no_grad():
    test_input = torch.randn(1, 5, 16)   # one sequence
    prediction = torch.argmax(model(test_input))
    print("Predicted class:", prediction.item())

## Basic MaskRCNN

In [None]:
import torch
import torchvision
import torch.optim as optim

from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F



In [None]:
# 1. Create a dummy image batch (1 image, 3 channels, 224x224)
images = [torch.rand(3, 224, 224)]

# 2. Create dummy targets (needed for training)
#   Each target is a dict with: boxes, labels, masks
targets = [{
    "boxes": torch.tensor([[50, 50, 150, 150]], dtype=torch.float32),  # [xmin, ymin, xmax, ymax]
    "labels": torch.tensor([1]),                                       # class index
    "masks": torch.randint(0, 2, (1, 224, 224), dtype=torch.uint8)     # binary mask for object
}]


In [None]:
# 3. Load pre-trained Mask R-CNN model
model = maskrcnn_resnet50_fpn(weights="DEFAULT")
print(model)
# OR

In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models.detection import MaskRCNN

# Custom backbone: a simple CNN
class MyBackbone(nn.Module):
    def __init__(self):
        super(MyBackbone, self).__init__()
        self.body = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),  # (N,3,H,W) -> (N,32,H/2,W/2)
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # -> (N,64,H/4,W/4)
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), # -> (N,128,H/8,W/8)
            nn.ReLU()
        )
        # Required by Mask R-CNN
        self.out_channels = 128

    def forward(self, x):
        # Must return a dict of feature maps
        return {"0": self.body(x)}

# Create Mask R-CNN with your backbone
backbone = MyBackbone()
model = MaskRCNN(backbone, num_classes=2)  # 1 class + background
print(model)


In [None]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# or
# optimizer = optim.AdamW(params, lr=0.0001, weight_decay=0.0005)

#### Example Custom Dataset

In [None]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import os

class MyDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.images = list(sorted(os.listdir(image_dir)))  # e.g., img1.png, img2.png, ...

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # 1. Load image
        img_path = os.path.join(self.image_dir, self.images[idx])
        img = Image.open(img_path).convert("RGB")
        img = torchvision.transforms.functional.to_tensor(img)  # [C,H,W] float32

        # 2. Dummy annotations (for demo)
        # Let's pretend each image has 1 box & 1 mask
        boxes = torch.tensor([[50, 50, 150, 150]], dtype=torch.float32)  # [xmin, ymin, xmax, ymax]
        labels = torch.tensor([1], dtype=torch.int64)                    # class index = 1
        mask = torch.zeros((img.shape[1], img.shape[2]), dtype=torch.uint8)
        mask[50:150, 50:150] = 1                                         # simple square mask
        masks = mask.unsqueeze(0)                                        # [N,H,W]

        target = {
            "boxes": boxes,
            "labels": labels,
            "masks": masks,
            "image_id": torch.tensor([idx]),
            "area": torch.tensor([10000.0]),
            "iscrowd": torch.zeros((1,), dtype=torch.int64),
        }

        if self.transform:
            img = self.transform(img)

        return img, target

# Collate Function
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
# Usage
dataset = MyDataset("path/to/images")
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

# Test it
for images, targets in dataloader:
    print(len(images), len(targets))
    print(images[0].shape)   # torch.Size([3,H,W])
    print(targets[0])        # dict with boxes, labels, masks, etc.
    break

In [None]:
num_epochs = 2

for epoch in range(num_epochs):
    model.train()

    for images, targets in dataloader:   # from your custom DataLoader
        # move data to GPU if available
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # forward pass → returns dict of losses
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # backward + optimize
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {losses.item():.4f}")

In [None]:
model.eval()   # switch to evaluation mode

with torch.no_grad():
    for images, targets in dataloader:   # same DataLoader, but targets optional in eval
        images = list(img.to(device) for img in images)

        # forward pass (NO targets here)
        outputs = model(images)

        # Each element in outputs is a dict with keys:
        # 'boxes', 'labels', 'scores', 'masks'
        for i, output in enumerate(outputs):
            print(f"Image {i}:")
            print("Boxes:", output["boxes"])     # [N,4] tensor
            print("Labels:", output["labels"])   # [N]
            print("Scores:", output["scores"])   # confidence scores
            print("Masks:", output["masks"].shape)  # [N,1,H,W]

#### Visualizing Predictions

In [None]:
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F

img = images[0].cpu()
plt.imshow(F.to_pil_image(img))

# plot first predicted box
box = boxes[0].cpu().numpy()
plt.gca().add_patch(
    plt.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1],
                  fill=False, color="red", linewidth=2)
)
plt.show()
