In this i used pretrained resnet pred element col

In [1]:
import os
import cv2
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import nn, optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Custom Dataset for Shape Feature Extraction
class ShapeDataset(Dataset):
    def __init__(self, folder_path, labels_csv, transform=None):
        self.folder_path = folder_path
        self.labels = pd.read_csv(labels_csv)
        self.transform = transform
        
        # Map shape labels to integers
        self.shape_map = {
            'Wasser': 0,       # Pentagon
            'Erde': 1,         # Circular shape
            'Luft': 2,         # Hexagon
            'Feuer': 3,        # Square/Rectangle
            'Boden': 4         # Triangle
        }
        self.labels['shape'] = self.labels['element'].map(self.shape_map)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        video_id = self.labels.iloc[idx]['video_id']
        video_path = os.path.join(self.folder_path, f"{video_id}.mp4")
        
        # Read the first frame of the video
        cap = cv2.VideoCapture(video_path)
        ret, frame = cap.read()
        cap.release()

        if not ret:
            raise RuntimeError(f"Could not read video: {video_path}")

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, (64, 64))

        if self.transform:
            frame = self.transform(frame)

        label = self.labels.iloc[idx]['shape']
        return frame, label


In [2]:
#Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Load data and split into train and test sets
folder_path = 'BH25/Training_Data/Train_Videos'  
labels_csv = 'BH25/Training_Data/train.csv' 

dataset = ShapeDataset(folder_path, labels_csv, transform=transform)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Limit the train_dataset to the first 1000 videos
train_dataset = torch.utils.data.Subset(train_dataset, range(1000))

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [3]:
from torchvision.models import resnet18
#pretrained resnet model 
class PretrainedShapeClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(PretrainedShapeClassifier, self).__init__()
        self.model = resnet18(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PretrainedShapeClassifier(num_classes=5).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for frames, labels in train_loader:
        frames, labels = frames.to(device), labels.to(device)

        # Forward pass
        outputs = model(frames)

        # Compute loss
        loss = loss_fn(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\hp/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:02<00:00, 20.6MB/s]


Epoch 1/30, Loss: 0.6109
Epoch 2/30, Loss: 0.1446
Epoch 3/30, Loss: 0.1081
Epoch 4/30, Loss: 0.1151
Epoch 5/30, Loss: 0.0679
Epoch 6/30, Loss: 0.0387
Epoch 7/30, Loss: 0.0395
Epoch 8/30, Loss: 0.0366
Epoch 9/30, Loss: 0.0191
Epoch 10/30, Loss: 0.0271
Epoch 11/30, Loss: 0.0289
Epoch 12/30, Loss: 0.0107
Epoch 13/30, Loss: 0.0219
Epoch 14/30, Loss: 0.0397
Epoch 15/30, Loss: 0.0154
Epoch 16/30, Loss: 0.0311
Epoch 17/30, Loss: 0.0298
Epoch 18/30, Loss: 0.0313
Epoch 19/30, Loss: 0.0347
Epoch 20/30, Loss: 0.0556
Epoch 21/30, Loss: 0.0331
Epoch 22/30, Loss: 0.0049
Epoch 23/30, Loss: 0.0060
Epoch 24/30, Loss: 0.0019
Epoch 25/30, Loss: 0.0020
Epoch 26/30, Loss: 0.0011
Epoch 27/30, Loss: 0.0009
Epoch 28/30, Loss: 0.0029
Epoch 29/30, Loss: 0.0107
Epoch 30/30, Loss: 0.0162


In [4]:
# Testing loop
model.eval()
predictions, ground_truth = [], []
with torch.no_grad():
    for frames, labels in test_loader:
        frames, labels = frames.to(device), labels.to(device)
        outputs = model(frames)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())
        ground_truth.extend(labels.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(ground_truth, predictions)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Save the trained model
torch.save(model.state_dict(), 'resnet3d_model.pth')

Test Accuracy: 99.55%


In [5]:
import os
import cv2
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import nn, optim
import torch.nn.functional as F

# Custom Dataset for loading new videos
class NewVideoDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]
        self.transform = transform

    def __len__(self):
        return len(self.video_files)

    def __getitem__(self, idx):
        video_id = self.video_files[idx].split('.')[0]
        video_path = os.path.join(self.folder_path, self.video_files[idx])
        
        # Read the first frame of the video
        cap = cv2.VideoCapture(video_path)
        ret, frame = cap.read()
        cap.release()

        if not ret:
            raise RuntimeError(f"Could not read video: {video_path}")

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
        frame = cv2.resize(frame, (64, 64))  # Resize to 64x64

        if self.transform:
            frame = self.transform(frame)

        return video_id, frame

# Transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Define the new video folder path
new_videos_folder_path = 'BH25/Testing_Data'

# Load new video dataset
new_video_dataset = NewVideoDataset(new_videos_folder_path, transform=transform)
new_video_loader = DataLoader(new_video_dataset, batch_size=1, shuffle=False)

# Ensure the model is in evaluation mode
model.eval()

# Dictionary to store video IDs and their predicted elements
predictions = {'video_id': [], 'element': []}

# Map integer labels to shape names
shape_map_rev = {0: 'Wasser', 1: 'Erde', 2: 'Luft', 3: 'Feuer', 4: 'Boden'}

# Process new videos and make predictions
with torch.no_grad():
    for video_id, frame in new_video_loader:
        frame = frame.to(device)
        outputs = model(frame)
        _, predicted = torch.max(outputs, 1)
        element = shape_map_rev[predicted.item()]

        predictions['video_id'].append(video_id[0])
        predictions['element'].append(element)

# Save predictions to CSV
predictions_df = pd.DataFrame(predictions)
predictions_df.to_csv('predicted_elements.csv', index=False)

print("Predictions saved to predicted_elements.csv")

Predictions saved to predicted_elements.csv
