In [1]:
from datasets import load_dataset
from torchvision.transforms import Compose, Resize, ToTensor, Lambda
import torchvision.transforms.functional as TF
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
from torch.nn.utils.rnn import pad_sequence
from torchvision.transforms.functional import to_pil_image


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Directly load with splits if supported
datasets = load_dataset("jinmang2/ucf_crime")
datasets = datasets['train'].shuffle(seed=42)


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [3]:
train_test_split = datasets.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

train_val_split = train_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_val_split['train']
val_dataset = train_val_split['test']


In [4]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import cv2

class VideoDataset(Dataset):
    def __init__(self, dataset, target_fps=1, transform=None):
        self.dataset = dataset
        self.target_fps = target_fps
        self.transform = transform or transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),  # Appropriate for ResNet
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.feature_extractor = models.resnet50(pretrained=True)
        # Modify the fully connected layer to an Identity to use as a feature extractor
        self.feature_extractor.fc = torch.nn.Identity()
        self.feature_extractor.eval()

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        video_path = self.dataset[idx]['video_path']
        frames = self.load_video(video_path, self.target_fps)
        features = []
        with torch.no_grad():
            for frame in frames:
                frame = self.transform(frame)
                frame = frame.unsqueeze(0)  # Add batch dimension for individual frame processing
                feature = self.feature_extractor(frame)
                features.append(feature.squeeze(0))  # Remove batch dimension after processing
        features = torch.stack(features)  # Stack to get a single tensor for all frames
        label = self.dataset[idx]['event']
        return features, label

    def load_video(self, video_path, target_fps):
        cap = cv2.VideoCapture(video_path)
        frames = []
        native_fps = cap.get(cv2.CAP_PROP_FPS)
        frame_ratio = max(1, round(native_fps / target_fps))

        frame_idx = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_idx % frame_ratio == 0:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames.append(frame)
            frame_idx += 1
        cap.release()
        return frames


In [5]:
from torch.utils.data import DataLoader

train_dataset = VideoDataset(train_dataset)
val_dataset = VideoDataset(val_dataset)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)



In [6]:
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.input_dim = 2048  # Number of input features
        self.h = 256  # Number of features in hidden state
        self.numOfLayers = 1  # Number of LSTM layers
        self.numOfClasses = 14  # Number of output classes
        self.W = nn.Linear(self.h, self.numOfClasses)
        self.lstm = nn.LSTM(self.input_dim, self.h, self.numOfLayers, batch_first=True)


    def forward(self, inputs):
        # Forward pass through LSTM layer
        # x of shape (batch, seq, feature)
        output, (hidden, cn) = self.lstm(inputs)
        # Assuming using the last hidden state
        out = self.W(hidden[-1])
        return out



In [7]:
def validate(model, val_loader, device):
    model.eval()  # Set the model to evaluation mode
    correct_predictions = 0
    total_samples = 0
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            # Assuming outputs are logits and you are doing a classification task
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = correct_predictions / total_samples
    model.train()  # Set the model back to training mode
    return accuracy


In [None]:
import matplotlib.pyplot as plt

def train(model, val_loader, computeLoss, optimizer, num_epochs, device, save_path='best_model.pth'):
    model = model.to(device)
    previous_val_accuracy = 0
    best_val_accuracy = 0

    for epoch in range(num_epochs):
        model.train()
        total_correct = 0
        total_samples = 0
        batch_losses = []
        batch_accuracies = []
        train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)


        for i, (inputs, labels) in enumerate(train_loader):
            print(inputs.shape, labels)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = computeLoss(outputs, labels)
            batch_losses.append(loss.item())

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Calculate batch accuracy
            _, predicted = torch.max(outputs, 1)
            correct = (predicted == labels).sum().item()
            total_correct += correct
            total_samples += labels.size(0)

            if (i + 1) % 100 == 0:
                batch_accuracy = 100.0 * total_correct / total_samples
                batch_accuracies.append(batch_accuracy)
                print(f'Epoch {epoch+1}, Step {i+1}, Loss: {sum(batch_losses) / len(batch_losses):.4f}, '
                      f'Accuracy: {batch_accuracy:.2f}%')
                total_correct = 0
                total_samples = 0
                batch_losses = []

        # Validation after each epoch
        val_accuracy = validate(model, val_loader, device)
        print(f'Epoch {epoch+1}: Validation Accuracy: {val_accuracy:.4f}')

        # Saving the model if it has the best validation loss
        if val_accuracy < best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), save_path)
            print(f'Saved best model to {save_path}')

        # Early stopping condition (less than 10% decrease)
        if val_accuracy < 0.9 * previous_val_accuracy:
            print("Stopping early due to less than 10% decrease in validation loss.")
            break
        previous_val_accuracy = val_accuracy

        # Plotting
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.plot(batch_accuracies, label='Accuracy per 100 examples')
        plt.title('Accuracy per 100 examples')
        plt.xlabel('Batch')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()

    # Optionally save the final model state
    final_model_path = 'final_model.pth'
    torch.save(model.state_dict(), final_model_path)
    print(f'Saved final model state to {final_model_path}')

# Assumptions about other components of your setup
model = LSTM()  # Your LSTM model
computeLoss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Assuming 'train_loader' and 'val_loader' are defined (your DataLoader instances)
train(model, val_loader, computeLoss, optimizer, num_epochs=5, device=device)



In [5]:
# Load the test dataset
test_dataset = VideoDataset(test_dataset)

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)




In [10]:
modelFinal = LSTM()
modelFinal.load_state_dict(torch.load('final_model.pth'))

<All keys matched successfully>

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()

def test(model, data_loader, device):
    model = model.to(device)
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    correct_predictions = 0
    total_predictions = 0
    counter = 0
    with torch.no_grad():  # Disable gradient computation
            for inputs, labels in data_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_predictions += (predicted == labels).sum().item()
                total_predictions += labels.size(0)
                counter += 1
                print(predicted, labels, correct_predictions, total_predictions)
    
    avg_loss = total_loss / len(data_loader)
    accuracy = correct_predictions / total_predictions
    return avg_loss, accuracy

# Evaluate the model
test_loss, test_accuracy = test(modelFinal, test_loader, device)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import cv2
import numpy as np

class VideoDataset3DCNN(Dataset):
    def __init__(self, dataset, clip_length=120, transform=None):
        self.dataset = dataset
        self.clip_length = clip_length 
        self.transform = transform or transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((112, 112)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        video_path = self.dataset[idx]['video_path']
        label = self.dataset[idx]['anomaly']
        frames = self.load_video(video_path, self.clip_length)

        if self.transform:
            frames = [self.transform(frame) for frame in frames]

        # Stack frames along the zeroth dimension and unsqueeze to add a dummy batch dimension
        frames_tensor = torch.stack(frames, dim=0)
        frames_tensor = frames_tensor.permute(1, 0, 2, 3)

        return frames_tensor, label

    def load_video(self, video_path, clip_length):
        """
        Load a clip containing 'clip_length' frames from a video.
        """
        cap = cv2.VideoCapture(video_path)
        frames = []
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        frame_indices = [min(int(fps * i), int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1) for i in range(clip_length)]

        for idx in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                frames.append(frame)
            elif frames:
                # If the video is shorter than required and no frames are left to read, pad with the last frame
                frames += [frames[-1]] * (clip_length - len(frames))
                break
            else:
                # If no frames have been captured at all, break early
                break
        cap.release()
        return frames


In [6]:
from torch.utils.data import DataLoader

# Assuming 'datasets' is your loaded dataset, e.g., from Hugging Face or another source
train_dataset = VideoDataset3DCNN(train_dataset)
val_dataset = VideoDataset3DCNN(val_dataset)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

In [7]:
for inputs, labels in train_loader:
    print(inputs.shape, labels)
    break

torch.Size([1, 3, 120, 112, 112]) tensor([1])


In [10]:
import torch
import torch.nn as nn

class Conv3D(nn.Module):
    def __init__(self, num_classes):
        super(Conv3D, self).__init__()
        self.conv1 = nn.Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.conv2 = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), padding=1)
        
        self.fc1 = nn.Linear(3010560  , 256)  # Feature reduction layer
        self.fc2 = nn.Linear(256, num_classes)  # Output layer to classes
        
    def forward(self, inputs):
        convLayer1 = self.conv1(inputs)
        convLayer1 = self.relu(convLayer1)
        convLayer1 = self.pool(convLayer1)
        
        convLayer2 = self.conv2(convLayer1)
        convLayer2 = self.relu(convLayer2)
        
        # Flatten the tensor for the fully connected layer
        featureVector = convLayer2.view(convLayer2.size(0), -1)  # Ensure it's reshaped properly
        
        layer1 = self.fc1(featureVector)
        layer1 = self.relu(layer1)
        outputs = self.fc2(layer1)
        
        return outputs

In [11]:
def CNNvalidate(model, val_loader, device):
    model.eval()  # Set the model to evaluation mode
    total_correct = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient computation
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get the predicted classes
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = total_correct / total_samples
    print(f'Validation Accuracy: {accuracy * 100:.2f}%')
    return accuracy


In [12]:
import matplotlib.pyplot as plt

def CNNtrain(model, val_loader, computeLoss, optimizer, num_epochs, device, save_path='best_model.pth'):
    model = model.to(device)
    best_val_accuracy = 0

    for epoch in range(num_epochs):
        model.train()
        total_correct = 0
        total_samples = 0
        batch_losses = []
        batch_accuracies = []

        for i, (inputs, labels) in enumerate(train_loader):
            print(inputs.shape, labels)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = computeLoss(outputs, labels)
            batch_losses.append(loss.item())

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Calculate batch accuracy
            _, predicted = torch.max(outputs.data, 1)
            correct = (predicted == labels).sum().item()
            total_correct += correct
            total_samples += labels.size(0)

            if (i + 1) % 100 == 0:
                batch_accuracy = 100.0 * total_correct / total_samples
                batch_accuracies.append(batch_accuracy)
                print(f'Epoch {epoch+1}, Step {i+1}, Loss: {sum(batch_losses) / len(batch_losses):.4f}, '
                      f'Accuracy: {batch_accuracy:.2f}%')
                total_correct = 0
                total_samples = 0
                batch_losses = []

        # Validation after each epoch
        val_accuracy = validate(model, val_loader, device)
        print(f'Epoch {epoch+1}: Validation Accuracy: {val_accuracy:.4f}')

        # Saving the model if it has the best validation accuracy
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), save_path)
            print(f'Saved best model to {save_path}')

    # Plotting
    plt.figure(figsize=(10, 5))
    plt.plot(batch_accuracies, label='Accuracy per 100 examples')
    plt.title('Accuracy per 100 examples')
    plt.xlabel('Batch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

    # Optionally save the final model state
    final_model_path = 'final_model.pth'
    torch.save(model.state_dict(), final_model_path)
    print(f'Saved final model state to {final_model_path}')


# Assumptions about other components of your setup
model = Conv3D(2)  # Your LSTM model
computeLoss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Assuming 'train_loader' and 'val_loader' are defined (your DataLoader instances)
CNNtrain(model, val_loader, computeLoss, optimizer, num_epochs=5, device=device)



cuda
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([1])
torch.Size([1, 3, 120, 112, 112]) tensor([0])
torch.Size([1, 3, 120, 112, 1