In [1]:
!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [2]:
import os
import keras
from keras import layers
from keras.applications.densenet import DenseNet121
from tensorflow_docs.vis import embed
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import imageio
import cv2

In [3]:
MAX_SEQ_LENGTH = 20
NUM_FEATURES = 1024
IMG_SIZE = 128

EPOCHS = 200

In [4]:
import os
import numpy as np
import cv2
from tensorflow import keras
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import CenterCrop, Input
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split


IMG_SIZE = 224
MAX_SEQ_LENGTH = 120
NUM_FEATURES = 1024


center_crop_layer = keras.layers.CenterCrop(IMG_SIZE, IMG_SIZE)

def crop_center(frame):
    cropped = center_crop_layer(frame[None, ...])
    cropped = keras.backend.squeeze(cropped, axis=0)
    return cropped.numpy()

def load_video(path, max_frames=0, offload_to_cpu=False):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = frame[:, :, [2, 1, 0]]  # BGR到RGB
            frame = crop_center(frame)
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

def build_feature_extractor():
    feature_extractor = DenseNet121(weights="imagenet", include_top=False, pooling="avg", input_shape=(IMG_SIZE, IMG_SIZE, 3))
    preprocess_input = keras.applications.densenet.preprocess_input
    inputs = Input((IMG_SIZE, IMG_SIZE, 3))
    x = preprocess_input(inputs)
    outputs = feature_extractor(x)
    model = Model(inputs, outputs, name="feature_extractor")
    return model

feature_extractor = build_feature_extractor()

def prepare_videos(root_dir):
    frame_features = []
    labels = []
    for label in ['0', '1']:
        class_dir = os.path.join(root_dir, label)
        for video_file in os.listdir(class_dir):
            video_path = os.path.join(class_dir, video_file)
            frames = load_video(video_path, max_frames=MAX_SEQ_LENGTH)
            frames_feature = np.zeros((MAX_SEQ_LENGTH, NUM_FEATURES))
            for i, frame in enumerate(frames):
                if i < MAX_SEQ_LENGTH:
                    frame = np.expand_dims(frame, axis=0)  # 添加批次维度
                    frames_feature[i] = feature_extractor.predict(frame)
            frame_features.append(frames_feature)
            labels.append(int(label))
    frame_features = np.array(frame_features)
    labels = np.array(labels)
    return frame_features, labels

root_dir = '/content/drive/MyDrive/ViolenceDataPaper2'

frame_features, labels = prepare_videos(root_dir)


X_train, X_test, y_train, y_test = train_test_split(frame_features, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2

print(f"训练集特征维度: {X_train.shape}, 训练集标签维度: {y_train.shape}")
print(f"验证集特征维度: {X_val.shape}, 验证集标签维度: {y_val.shape}")
print(f"测试集特征维度: {X_test.shape}, 测试集标签维度: {y_test.shape}")


[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
训练集特征维度: (147, 120, 1024), 训练集标签维度: (147,)
验证集特征维度: (50, 120, 1024), 验证集标签维度: (50,)
测试集特征维度: (50, 120, 1024), 测试集标签维度: (50,)


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam

In [6]:
class PositionalEmbedding(nn.Module):
    def __init__(self, sequence_length, embed_size):
        super(PositionalEmbedding, self).__init__()
        self.position_embeddings = nn.Embedding(num_embeddings=sequence_length, embedding_dim=embed_size)

    def forward(self, x):
        positions = torch.arange(0, x.size(1)).unsqueeze(0).to(x.device)
        x = x + self.position_embeddings(positions)
        return x


In [7]:
class TransformerEncoder(nn.Module):
    def __init__(self, embed_size, dense_dim, num_heads):
        super(TransformerEncoder, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=embed_size, num_heads=num_heads, dropout=0.3)
        self.dense_proj = nn.Sequential(
            nn.Linear(embed_size, dense_dim),
            nn.GELU(),
            nn.Linear(dense_dim, embed_size)
        )
        self.layernorm1 = nn.LayerNorm(embed_size)
        self.layernorm2 = nn.LayerNorm(embed_size)

    def forward(self, src):
        src_attended, _ = self.attention(src, src, src)
        src = self.layernorm1(src + src_attended)
        src_proj = self.dense_proj(src)
        src = self.layernorm2(src + src_proj)
        return src


In [8]:
class VideoClassifier(nn.Module):
    def __init__(self, sequence_length, num_features, embed_size, dense_dim, num_heads, num_classes):
        super(VideoClassifier, self).__init__()
        self.pos_embedding = PositionalEmbedding(sequence_length, num_features)
        self.transformer_encoder = TransformerEncoder(embed_size, dense_dim, num_heads)
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        x = self.pos_embedding(x)
        x = x.permute(1, 0, 2)  # Transformer expects (Seq Len, Batch, Features)
        x = self.transformer_encoder(x)
        x = x.permute(1, 2, 0)  # Pooling expects (Batch, Features, Seq Len)
        x = self.global_pool(x).squeeze(2)
        x = self.dropout(x)
        x = self.fc(x)
        return x


In [None]:
EPOCHS = 5

In [9]:

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor.view(-1, 1))
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor.view(-1, 1))
val_loader = DataLoader(dataset=val_dataset, batch_size=64, shuffle=False)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor.view(-1, 1))
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


In [10]:
def train_model(model, train_loader, val_loader, num_epochs):
    optimizer = Adam(model.parameters(), lr=1e-3)
    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # 验证逻辑
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

    return model



In [11]:

model = VideoClassifier(
    sequence_length=MAX_SEQ_LENGTH,
    num_features=NUM_FEATURES,
    embed_size=1024,
    dense_dim=128,
    num_heads=4,
    num_classes=1
)


In [12]:

EPOCHS = 240  # 或根据您的需求调整

# 开始训练模型
trained_model = train_model(model, train_loader, val_loader, EPOCHS)


Epoch 1/240, Train Loss: 1.3405, Val Loss: 0.5528
Epoch 2/240, Train Loss: 0.8405, Val Loss: 0.5072
Epoch 3/240, Train Loss: 0.5303, Val Loss: 0.3757
Epoch 4/240, Train Loss: 0.4274, Val Loss: 0.7187
Epoch 5/240, Train Loss: 0.3609, Val Loss: 0.4532
Epoch 6/240, Train Loss: 0.4068, Val Loss: 0.2813
Epoch 7/240, Train Loss: 0.1825, Val Loss: 0.1304
Epoch 8/240, Train Loss: 0.1446, Val Loss: 0.1361
Epoch 9/240, Train Loss: 0.0851, Val Loss: 0.2061
Epoch 10/240, Train Loss: 0.1422, Val Loss: 0.2090
Epoch 11/240, Train Loss: 0.0709, Val Loss: 0.0614
Epoch 12/240, Train Loss: 0.0503, Val Loss: 0.0513
Epoch 13/240, Train Loss: 0.0176, Val Loss: 0.0194
Epoch 14/240, Train Loss: 0.0109, Val Loss: 0.0341
Epoch 15/240, Train Loss: 0.0061, Val Loss: 0.0311
Epoch 16/240, Train Loss: 0.0043, Val Loss: 0.0088
Epoch 17/240, Train Loss: 0.0026, Val Loss: 0.0229
Epoch 18/240, Train Loss: 0.0018, Val Loss: 0.0329
Epoch 19/240, Train Loss: 0.0016, Val Loss: 0.0137
Epoch 20/240, Train Loss: 0.0010, Val Lo

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            predictions = torch.round(torch.sigmoid(outputs))
            y_true.extend(targets.cpu().numpy())
            y_pred.extend(predictions.cpu().numpy())

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')


In [14]:
evaluate_model(trained_model, test_loader)


Accuracy: 0.9400
Precision: 0.8889
Recall: 1.0000
F1 Score: 0.9412


In [None]:
# 保存模型
torch.save(trained_model, 'model_complete.pth')



In [None]:
import torch


In [None]:
model = torch.load('/content/model_complete.pth')
model.eval()  # 切换到评估模式

VideoClassifier(
  (pos_embedding): PositionalEmbedding(
    (position_embeddings): Embedding(120, 1024)
  )
  (transformer_encoder): TransformerEncoder(
    (attention): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True)
    )
    (dense_proj): Sequential(
      (0): Linear(in_features=1024, out_features=128, bias=True)
      (1): GELU(approximate='none')
      (2): Linear(in_features=128, out_features=1024, bias=True)
    )
    (layernorm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (layernorm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (global_pool): AdaptiveAvgPool1d(output_size=1)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=1024, out_features=1, bias=True)
)

In [None]:
import torch

In [None]:
from google.colab import drive
drive.mount('/content/drive')

model_path = '/content/drive/My Drive/model_complete.pth'

# 假设 'model' 是您要保存的 PyTorch 模型
import torch
torch.save(model.state_dict(), model_path)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
import torch
import torch.nn as nn

class Basic1DCNN(nn.Module):
    def __init__(self, num_classes=1):
        super(Basic1DCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1024, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        # 假设池化后的序列长度为30，这是一个示例值，根据您的池化层结果进行调整
        self.fc1 = nn.Linear(128 * 30, 512)  # 需要根据实际情况调整
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Reshape to [batch_size, channels, sequence_length] for Conv1d
        x = x.permute(0, 2, 1)
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten the dimensions except for the batch
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [33]:
import torch
from torch.optim import Adam
import torch.nn.functional as F

def distillation_loss(outputs, labels, teacher_outputs, T=20, alpha=0.7):
    """
    Calculate the distillation loss
    :param outputs: Student outputs
    :param labels: True labels
    :param teacher_outputs: Teacher outputs
    :param T: Temperature for softmax
    :param alpha: Weight for the distillation loss
    """
    hard_loss = F.binary_cross_entropy_with_logits(outputs, labels.view(-1, 1))
    soft_loss = F.kl_div(F.log_softmax(outputs/T, dim=1),
                         F.softmax(teacher_outputs/T, dim=1),
                         reduction='batchmean')
    return alpha * soft_loss + (1 - alpha) * hard_loss


In [34]:
def train_student_with_teacher(teacher_model, student_model, train_loader, optimizer, epochs=10):
    teacher_model.eval()
    for epoch in range(epochs):
        for inputs, targets in train_loader:
            optimizer.zero_grad()


            with torch.no_grad():
                teacher_pred = teacher_model(inputs)


            student_pred = student_model(inputs)


            loss = distillation_loss(student_pred, targets, teacher_pred)


            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

In [35]:
num_epochs = 200

In [None]:
import torch
from torch.optim import Adam
import torch.nn.functional as F
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# 假设 Basic1DCNN 类已经被定义
student_model = Basic1DCNN(num_classes=1)
optimizer = Adam(student_model.parameters(), lr=1e-4)

# 假设 train_loader 和 val_loader 已经被定义，可以加载视频数据和标签
num_epochs = 200  # 或根据您的需求调整

def distillation_loss(outputs, labels, teacher_outputs, T=20, alpha=0.7):
    hard_loss = F.binary_cross_entropy_with_logits(outputs, labels)
    soft_loss = F.kl_div(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1), reduction='batchmean')
    return alpha * soft_loss + (1 - alpha) * hard_loss

for epoch in range(num_epochs):
    student_model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        teacher_outputs = trained_model(inputs)
        student_outputs = student_model(inputs)

        # 确保targets的形状与student_outputs相匹配
        # 如果targets已经是[64, 1]形状，不需要使用unsqueeze(1)
        loss = distillation_loss(student_outputs, targets, teacher_outputs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)

    # 验证部分
    student_model.eval()
    val_loss = 0
    true_labels = []
    pred_labels = []
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = student_model(inputs)
            loss = F.binary_cross_entropy_with_logits(outputs,targets)
            val_loss += loss.item()

            preds = torch.sigmoid(outputs).round()
            true_labels.extend(targets.numpy())
            pred_labels.extend(preds.numpy())

    avg_val_loss = val_loss / len(val_loader)
    precision = precision_score(true_labels, pred_labels, zero_division=0)
    recall = recall_score(true_labels, pred_labels)
    f1 = f1_score(true_labels, pred_labels)

    print(f'Epoch {epoch+1}/{num_epochs}, Average Train Loss: {avg_loss:.4f}, Average Val Loss: {avg_val_loss:.4f}')
    print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')

In [46]:
import torch
from torch.optim import Adam
import torch.nn.functional as F
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
import numpy as np

# 假设 Basic1DCNN 类已经被定义
student_model = Basic1DCNN(num_classes=1)
optimizer = Adam(student_model.parameters(), lr=1e-4)

# 假设 train_loader 和 val_loader 已经被定义，可以加载视频数据和标签
num_epochs = 200  # 或根据您的需求调整

def distillation_loss(outputs, labels, teacher_outputs, T=20, alpha=0.7):
    hard_loss = F.binary_cross_entropy_with_logits(outputs, labels)  # 确保labels的维度匹配
    soft_loss = F.kl_div(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1), reduction='batchmean')
    return alpha * soft_loss + (1 - alpha) * hard_loss

for epoch in range(num_epochs):
    student_model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        teacher_outputs = trained_model(inputs)
        student_outputs = student_model(inputs)

        loss = distillation_loss(student_outputs, targets, teacher_outputs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)

    # 验证部分
    student_model.eval()
    val_loss = 0
    true_labels = []
    pred_scores = []  # 用于AP计算的预测分数
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = student_model(inputs)
            loss = F.binary_cross_entropy_with_logits(outputs, targets)
            val_loss += loss.item()

            pred_scores.extend(torch.sigmoid(outputs).numpy().flatten())  # 收集原始分数
            true_labels.extend(targets.numpy())

    avg_val_loss = val_loss / len(val_loader)
    precision = precision_score(true_labels, np.round(pred_scores), zero_division=0)
    recall = recall_score(true_labels, np.round(pred_scores))
    f1 = f1_score(true_labels, np.round(pred_scores))
    ap = average_precision_score(true_labels, pred_scores)  # 计算AP值

    print(f'Epoch {epoch+1}/{num_epochs}, Average Train Loss: {avg_loss:.4f}, Average Val Loss: {avg_val_loss:.4f}')
    print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, AP: {ap:.4f}')




Epoch 1/200, Average Train Loss: 0.2015, Average Val Loss: 0.6208
Precision: 0.7879, Recall: 0.8667, F1 Score: 0.8254, AP: 0.9443
Epoch 2/200, Average Train Loss: 0.1857, Average Val Loss: 0.5393
Precision: 0.7879, Recall: 0.8667, F1 Score: 0.8254, AP: 0.9476
Epoch 3/200, Average Train Loss: 0.1683, Average Val Loss: 0.4607
Precision: 0.8125, Recall: 0.8667, F1 Score: 0.8387, AP: 0.9525
Epoch 4/200, Average Train Loss: 0.1478, Average Val Loss: 0.3891
Precision: 0.8929, Recall: 0.8333, F1 Score: 0.8621, AP: 0.9606
Epoch 5/200, Average Train Loss: 0.1180, Average Val Loss: 0.3300
Precision: 0.8929, Recall: 0.8333, F1 Score: 0.8621, AP: 0.9677
Epoch 6/200, Average Train Loss: 0.1032, Average Val Loss: 0.2823
Precision: 0.8710, Recall: 0.9000, F1 Score: 0.8852, AP: 0.9752
Epoch 7/200, Average Train Loss: 0.0850, Average Val Loss: 0.2507
Precision: 0.8529, Recall: 0.9667, F1 Score: 0.9062, AP: 0.9838
Epoch 8/200, Average Train Loss: 0.0638, Average Val Loss: 0.1833
Precision: 0.9667, Recal