In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm

In [None]:
DATASET_PATH = "/content/drive/MyDrive/train_FUll"

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

In [None]:
actions = sorted(os.listdir(DATASET_PATH))
label_map = {action: idx for idx, action in enumerate(actions)}
num_classes = len(actions)
print("Total classes:", num_classes)

In [None]:
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

In [None]:
class ActionSequenceDataset(Dataset):
    def __init__(self, root_dir, seq_len=3, max_images=75, transform=None, cache_images=False):
        self.samples = []
        self.transform = transform
        self.seq_len = seq_len
        self.cache_images = cache_images
        self.cached_data = []

        for action in sorted(os.listdir(root_dir)):
            action_path = os.path.join(root_dir, action)
            images = sorted([img for img in os.listdir(action_path) if img.endswith(".jpg")])[:max_images]

            for i in range(0, max_images, seq_len):
                seq_imgs = images[i:i+seq_len]
                if len(seq_imgs) == seq_len:
                    paths = [os.path.join(action_path, img) for img in seq_imgs]
                    self.samples.append((paths, label_map[action]))

        if self.cache_images:
            print("Caching images in memory...")
            for paths, label in tqdm(self.samples):
                frames = []
                for p in paths:
                    img = Image.open(p).convert("RGB")
                    img = self.transform(img)
                    frames.append(img)
                self.cached_data.append((torch.stack(frames), label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if self.cache_images:
            return self.cached_data[idx]
        paths, label = self.samples[idx]
        frames = []
        for p in paths:
            img = Image.open(p).convert("RGB")
            img = self.transform(img)
            frames.append(img)
        return torch.stack(frames), label


In [None]:
class CNN_LSTM(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        base = models.mobilenet_v2(pretrained=True)
        for param in base.parameters():
            param.requires_grad = False
        self.cnn = base.features

        self.pool = nn.AdaptiveAvgPool2d(1)
        self.lstm = nn.LSTM(input_size=1280, hidden_size=256, num_layers=1, batch_first=True)
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        B, T, C, H, W = x.shape
        x = x.view(B*T, C, H, W)
        feat = self.cnn(x)
        feat = self.pool(feat).view(B, T, -1)
        out, _ = self.lstm(feat)
        return self.fc(out[:, -1, :])

model = CNN_LSTM(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
SEQ_LEN = 3
BATCH_SIZE = 16
MAX_IMAGES_PER_CLASS = 75
EPOCHS = 10

dataset = ActionSequenceDataset(
    DATASET_PATH,
    seq_len=SEQ_LEN,
    max_images=MAX_IMAGES_PER_CLASS,
    transform=transform,
    cache_images=False
)

loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
print("Total sequences:", len(dataset))

In [None]:
scaler = torch.cuda.amp.GradScaler()

  scaler = torch.cuda.amp.GradScaler()


In [None]:
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0
    for frames, labels in tqdm(loader):
        frames, labels = frames.to(device), labels.to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(frames)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()

    print(f"Epoch [{epoch+1}/{EPOCHS}] Loss: {epoch_loss/len(loader):.4f}")

In [None]:
MODEL_PATH = "/content/drive/MyDrive/cnn_Lstm_model.pth"
torch.save(model.state_dict(), MODEL_PATH)
print("Model saved at:", MODEL_PATH)