In [None]:
# Import libraries
import cv2
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [None]:
# you will be prompted with a window asking to grant permissions
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# fill in the path in your Google Drive in the string below. Note: do not escape slashes or spaces
import os
datadir = "/content/drive/MyDrive/ECE494_Project/"
if not os.path.exists(datadir):
  !ln -s "/content/drive/MyDrive/ECE494_Project/" $datadir # TODO: Fill your Assignment 4 path
os.chdir(datadir)
!pwd

/content/drive/MyDrive/ECE494_Project


In [None]:
# Define the deep custom 3D CNN model
class DeepCrashNN(nn.Module):
    def __init__(self):
        super(DeepCrashNN, self).__init__()
        # Block 1: 3D Conv + BatchNorm + ReLU, spatial pooling only (temporal resolution remains)
        self.block1 = nn.Sequential(
            nn.Conv3d(in_channels=3, out_channels=16, kernel_size=3, padding=1),
            nn.BatchNorm3d(16),
            nn.ReLU(),
            nn.Conv3d(in_channels=16, out_channels=16, kernel_size=3, padding=1),
            nn.BatchNorm3d(16),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2))  # Only spatial pooling: reduces H & W by 2
        )
        # Block 2: Increase channels to 32
        self.block2 = nn.Sequential(
            nn.Conv3d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.Conv3d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 2, 2))
        )
        # Block 3: Increase channels to 64 with global pooling at the end
        self.block3 = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool3d(1)  # Global average pooling over (T, H, W)
        )
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        x = self.block1(x)  # (B, 16, T, H/2, W/2)
        x = self.block2(x)  # (B, 32, T, H/4, W/4)
        x = self.block3(x)  # (B, 64, 1, 1, 1)
        x = x.view(x.size(0), -1)  # Flatten to (B, 64)
        x = self.dropout(x)
        x = self.fc(x)             # (B, 1)
        return torch.sigmoid(x)    # Probability in [0,1]

# Instantiate the model and set it to evaluation mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
model = DeepCrashNN().to(device)
model.eval()
print("Deep custom 3D CNN model initialized and set to eval mode.")

Using device: cpu
Deep custom 3D CNN model initialized and set to eval mode.


In [None]:
# Load training and test CSV files
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
print("Training data loaded. Number of training videos:", len(train_df))
print("Test data loaded. Number of test videos:", len(test_df))

Training data loaded. Number of training videos: 1500
Test data loaded. Number of test videos: 1344


In [None]:
# Define function to extract randomly sampled frames from a video
def extract_random_frames(video_path, num_frames=40, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames <= 0:
        cap.release()
        return None

    # Randomly sample 'num_frames' unique indices and sort them to preserve temporal order
    frame_indices = sorted(np.random.choice(total_frames, num_frames, replace=False))

    frames = []
    current_frame = 0
    next_idx = 0
    ret = True
    while ret and next_idx < len(frame_indices):
        ret, frame = cap.read()
        if not ret:
            break
        if current_frame == frame_indices[next_idx]:
            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = frame.astype(np.float32) / 255.0  # Normalize to [0,1]
            frames.append(frame)
            next_idx += 1
        current_frame += 1
    cap.release()

    if len(frames) < num_frames:
        while len(frames) < num_frames:
            frames.append(frames[-1])

    # Convert frames to tensor with shape (B, C, T, H, W)
    frames = np.stack(frames, axis=0)           # (T, H, W, C)
    frames = np.transpose(frames, (3, 0, 1, 2))   # (C, T, H, W)
    frames_tensor = torch.from_numpy(frames).unsqueeze(0)  # Add batch dimension
    return frames_tensor

In [None]:
# Define prediction function
def predict_video(video_path):
    frames_tensor = extract_random_frames(video_path, num_frames=16, resize=(224,224))
    if frames_tensor is None:
        return 0.0  # Default probability if video cannot be processed
    frames_tensor = frames_tensor.to(device)
    with torch.no_grad():
        output = model(frames_tensor)  # Output shape: (B, 1)
        prob = output.item()
    return prob

In [None]:
from torch.utils.data import Dataset, DataLoader

class VideoDataset(Dataset):
    def __init__(self, df, video_dir, num_frames=16, resize=(224, 224)):
        self.df = df
        self.video_dir = video_dir
        self.num_frames = num_frames
        self.resize = resize

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        video_id = int(float(row['id']))
        label = torch.tensor([row['target']], dtype=torch.float32)  # label 必須是 float
        video_filename = f"{video_id:05d}.mp4"
        video_path = os.path.join(self.video_dir, video_filename)

        frames = extract_random_frames(video_path, self.num_frames, self.resize)
        if frames is None:
            frames = torch.zeros((1, 3, self.num_frames, *self.resize))  # fallback

        return frames.squeeze(0), label

In [None]:
from sklearn.model_selection import train_test_split

train_split, val_split = train_test_split(train_df, test_size=0.2, random_state=42)

train_dataset = VideoDataset(train_split, video_dir='./train')
val_dataset = VideoDataset(val_split, video_dir='./train')

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=0)

In [None]:
# 損失函數 & 優化器
#criterion = nn.BCELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()

In [None]:
# 方式一：Adam
#optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# 方式二：SGD
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

# 方式三：RMSprop
#optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-4)

In [None]:
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
best_val_loss = float('inf')
num_epochs = 30

train_losses = []
val_losses = []

for epoch in tqdm(range(num_epochs)):
    model.train()
    train_loss = 0

    for videos, labels in train_loader:
        videos = videos.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    # ===== 驗證階段 =====
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for videos, labels in val_loader:
            videos = videos.to(device)
            labels = labels.to(device)
            outputs = model(videos)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)

    # 儲存每個 epoch 的 loss
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

    # ===== 儲存最佳模型 =====
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), "best_model.pth")
        print(f"新最佳模型已儲存（Val Loss: {best_val_loss:.4f}）")

# ===== 畫圖 =====
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training & Validation Loss Curve')
plt.legend()
plt.grid(True)
plt.show()

  3%|▎         | 1/30 [51:57<25:06:56, 3117.81s/it]

Epoch [1/30] - Train Loss: 0.7062 | Val Loss: 0.6904
新最佳模型已儲存（Val Loss: 0.6904）


  7%|▋         | 2/30 [1:21:35<18:07:10, 2329.65s/it]

Epoch [2/30] - Train Loss: 0.6949 | Val Loss: 0.6829
新最佳模型已儲存（Val Loss: 0.6829）


 10%|█         | 3/30 [1:51:08<15:33:56, 2075.42s/it]

Epoch [3/30] - Train Loss: 0.6909 | Val Loss: 0.6863


 13%|█▎        | 4/30 [2:20:43<14:07:58, 1956.87s/it]

Epoch [4/30] - Train Loss: 0.6877 | Val Loss: 0.6852


 17%|█▋        | 5/30 [2:50:21<13:08:26, 1892.25s/it]

Epoch [5/30] - Train Loss: 0.6860 | Val Loss: 0.6805
新最佳模型已儲存（Val Loss: 0.6805）


 20%|██        | 6/30 [3:20:00<12:21:33, 1853.88s/it]

Epoch [6/30] - Train Loss: 0.6817 | Val Loss: 0.6786
新最佳模型已儲存（Val Loss: 0.6786）


 23%|██▎       | 7/30 [3:49:38<11:41:07, 1829.02s/it]

Epoch [7/30] - Train Loss: 0.6839 | Val Loss: 0.6793


 27%|██▋       | 8/30 [4:19:17<11:04:49, 1813.15s/it]

Epoch [8/30] - Train Loss: 0.6789 | Val Loss: 0.6795


 30%|███       | 9/30 [4:48:50<10:30:07, 1800.38s/it]

Epoch [9/30] - Train Loss: 0.6768 | Val Loss: 0.6755
新最佳模型已儲存（Val Loss: 0.6755）


 33%|███▎      | 10/30 [5:18:30<9:58:06, 1794.30s/it]

Epoch [10/30] - Train Loss: 0.6750 | Val Loss: 0.6777


 37%|███▋      | 11/30 [5:48:07<9:26:30, 1788.99s/it]

Epoch [11/30] - Train Loss: 0.6727 | Val Loss: 0.6779


In [None]:
# Generate predictions for training videos
train_predictions = []

for idx, row in train_df.iterrows():
    # Convert video ID to integer and format with leading zeros (5 digits)
    video_id = int(float(row['id']))
    video_filename = f"{video_id:05d}.mp4"  # e.g., 01924.mp4
    video_path = os.path.join("train", video_filename)
    prob = predict_video(video_path)
    train_predictions.append(prob)
    if idx % 50 == 0:
        print(f"Processed {idx} training videos...")

train_df['predicted_score'] = train_predictions
print("Training predictions generated.")

Processed 0 training videos...


KeyboardInterrupt: 

In [None]:
model.load_state_dict(torch.load("best_model.pth",map_location=torch.device('cpu')))
model.eval()

DeepCrashNN(
  (block1): Sequential(
    (0): Conv3d(3, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv3d(16, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (4): BatchNorm3d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv3d(16, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (4): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=

In [None]:
# Generate predictions for test videos
test_predictions = []

for idx, row in test_df.iterrows():
    video_id = int(float(row['id']))
    video_filename = f"{video_id:05d}.mp4"
    video_path = os.path.join("test", video_filename)
    prob = predict_video(video_path)
    test_predictions.append(prob)
    if idx % 50 == 0:
        print(f"Processed {idx} test videos...")

test_df['score'] = test_predictions
print("Test predictions generated.")

Processed 0 test videos...
Processed 50 test videos...
Processed 100 test videos...
Processed 150 test videos...
Processed 200 test videos...
Processed 250 test videos...
Processed 300 test videos...
Processed 350 test videos...
Processed 400 test videos...
Processed 450 test videos...
Processed 500 test videos...
Processed 550 test videos...
Processed 600 test videos...
Processed 650 test videos...
Processed 700 test videos...
Processed 750 test videos...
Processed 800 test videos...
Processed 850 test videos...
Processed 900 test videos...
Processed 950 test videos...
Processed 1000 test videos...
Processed 1050 test videos...
Processed 1100 test videos...
Processed 1150 test videos...
Processed 1200 test videos...
Processed 1250 test videos...
Processed 1300 test videos...
Test predictions generated.


In [None]:
# Save submission file
submission = test_df[['id', 'score']]
submission.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' created successfully.")

Submission file 'submission.csv' created successfully.
