In this i applies resnet to pred power col (colour)

In [8]:
#importing libs
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import cv2
from torchvision import transforms
from torchvision.models import resnet18

In [9]:

# Load the training data (You might have to change the file path based on how you're working)
train_df = pd.read_csv('BH25/Training_Data/train.csv')

print(train_df.head())

   video_id element  motion power  speed          video_summary
0         1    Erde  linear   rot    9.6     (3.8147, 31.94809)
1         2   Feuer     shm  grin    9.6   (26.70288, -4.29153)
2         3   Feuer  random  geld    9.6      (3.8147, 8.58307)
3         4    Erde  zigzag  lila    6.9  (-24.79553, -0.95367)
4         5    Erde  linear  lila    9.6    (7.62939, 22.88818)


In [10]:
#applying filter to the power col as it has noice in the form of spelling mis
replacements = {
    'slau': 'blau', 'baau': 'blau', 'blru': 'blau', 'plau' : 'blau', 'btau' : 'blau',
    'trun' : 'grun', 'brun' : 'grun', 'gaun': 'grun', 'grin':'grun', 'grus' : 'grun',
    'helb' : 'gelb', 'gela' : 'gelb', 'geld' : 'gelb', 'gele' : 'gelb', 'gelt' : 'gelb',
    'rut' : 'rot', 'got' : 'rot', 'not' : 'rot', 'rat' : 'rot', 'rod' : 'rot',
    'lela' : 'lila', 'liga' : 'lila', 'lili' : 'lila', 'lisa' : 'lila', 'lula' : 'lila'
}
train_df['power'] = train_df['power'].replace(replacements)
unique_elements2 =train_df['power'].unique()
print(unique_elements2)

['rot' 'grun' 'gelb' 'lila' 'blau']


In [11]:
# Custom Dataset for Color Feature Extraction
#here i used only one frame to do so as i have to find colour so it can be found from one frame only
class ColorDataset(Dataset):
    def __init__(self, folder_path, labels_df, transform=None):
        self.folder_path = folder_path
        self.labels = labels_df
        self.transform = transform
        
        # Map color labels to integers
        self.color_map = {
            'rot': 0,  # some color
            'blau': 1, # Some color
            'lila': 2, # Some color
            'grun': 3, # Some color
            'gelb': 4  # Some color
        }

        # Map 'power' to integers
        self.labels['color'] = self.labels['power'].map(self.color_map)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        video_id = self.labels.iloc[idx]['video_id']
        video_path = os.path.join(self.folder_path, f"{video_id}.mp4")
        
        # Read the first frame of the video
        cap = cv2.VideoCapture(video_path)
        ret, frame = cap.read()
        cap.release()

        if not ret:
            raise RuntimeError(f"Could not read video: {video_path}")

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 
        frame = cv2.resize(frame, (64, 64)) 

        if self.transform:
            frame = self.transform(frame)

        label = self.labels.iloc[idx]['color']
        return frame, label


In [12]:
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load data and split
folder_path = 'BH25/Training_Data/Train_Videos'
dataset = ColorDataset(folder_path, train_df, transform=transform)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_dataset = torch.utils.data.Subset(train_dataset, list(range(min(len(train_dataset), 1000))))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [13]:
# Model definition
class PretrainedColorClassifier(nn.Module):
    def __init__(self, num_classes=5):
        super(PretrainedColorClassifier, self).__init__()
        self.model = resnet18(weights="IMAGENET1K_V1")
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# Training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PretrainedColorClassifier(num_classes=5).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for frames, labels in train_loader:
        frames, labels = frames.to(device), labels.to(device)
        outputs = model(frames)
        loss = loss_fn(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")


Epoch 1/30, Loss: 0.5860
Epoch 2/30, Loss: 0.0288
Epoch 3/30, Loss: 0.0227
Epoch 4/30, Loss: 0.0090
Epoch 5/30, Loss: 0.0132
Epoch 6/30, Loss: 0.0097
Epoch 7/30, Loss: 0.0115
Epoch 8/30, Loss: 0.0135
Epoch 9/30, Loss: 0.0108
Epoch 10/30, Loss: 0.0230
Epoch 11/30, Loss: 0.0265
Epoch 12/30, Loss: 0.0083
Epoch 13/30, Loss: 0.0020
Epoch 14/30, Loss: 0.0113
Epoch 15/30, Loss: 0.0204
Epoch 16/30, Loss: 0.0313
Epoch 17/30, Loss: 0.0144
Epoch 18/30, Loss: 0.0301
Epoch 19/30, Loss: 0.0205
Epoch 20/30, Loss: 0.0079
Epoch 21/30, Loss: 0.0014
Epoch 22/30, Loss: 0.0021
Epoch 23/30, Loss: 0.0204
Epoch 24/30, Loss: 0.0136
Epoch 25/30, Loss: 0.0265
Epoch 26/30, Loss: 0.0053
Epoch 27/30, Loss: 0.0016
Epoch 28/30, Loss: 0.0010
Epoch 29/30, Loss: 0.0020
Epoch 30/30, Loss: 0.0009


In [15]:
# Evaluation
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for frames, labels in test_loader:
        frames, labels = frames.to(device), labels.to(device)
        outputs = model(frames)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the validation set: {100 * correct / total:.2f}%')

Accuracy on the validation set: 100.00%


In [16]:
import os
import cv2
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Custom Dataset for loading new videos
class NewVideoDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]
        self.transform = transform

    def __len__(self):
        return len(self.video_files)

    def __getitem__(self, idx):
        video_id = self.video_files[idx].split('.')[0]
        video_path = os.path.join(self.folder_path, self.video_files[idx])
        
        # Read the first frame of the video
        cap = cv2.VideoCapture(video_path)
        ret, frame = cap.read()
        cap.release()

        if not ret:
            raise RuntimeError(f"Could not read video: {video_path}")

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
        frame = cv2.resize(frame, (64, 64))  # Resize to 64x64

        if self.transform:
            frame = self.transform(frame)

        return video_id, frame

# Transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Define the new video folder path
new_videos_folder_path = 'BH25/Testing_Data'

# Load new video dataset
new_video_dataset = NewVideoDataset(new_videos_folder_path, transform=transform)
new_video_loader = DataLoader(new_video_dataset, batch_size=1, shuffle=False)

# Ensure the model is in evaluation mode
model.eval()

# Dictionary to store video IDs and their predicted powers
predictions = {'video_id': [], 'power': []}

# Map integer labels to power categories
power_map_rev = {0: 'rot', 1: 'blau', 2: 'lila', 3: 'grun', 4: 'gelb'}

# Process new videos and make predictions
with torch.no_grad():
    for video_id, frame in new_video_loader:
        frame = frame.to(device)
        outputs = model(frame)
        _, predicted = torch.max(outputs, 1)
        power = power_map_rev[predicted.item()]

        predictions['video_id'].append(video_id[0])
        predictions['power'].append(power)

# Save predictions to CSV
predictions_df = pd.DataFrame(predictions)
predictions_df.to_csv('predicted_power.csv', index=False)

print("Predictions saved to predicted_power.csv")


Predictions saved to predicted_power.csv
