In [1]:
import imageio
import torch
from pytorch_msssim import ssim
import numpy as np
from PIL import Image
import cv2
import pyiqa
from torchvision.models import inception_v3
from torchvision import transforms
import scipy.linalg as linalg
import warnings
warnings.filterwarnings('ignore')

# 视频路径
base_path = 'videos'
raw_videos_path = f'{base_path}/raw'
synthetic_videos_path = f'{base_path}/synthetic'

# 视频列表
video_names = [
    'May.mp4', 'Jae-in.mp4', 'Lieu.mp4', 'Macron.mp4', 'Obama.mp4', 
    'Obama1.mp4', 'Obama2.mp4', 'Shaheen.mp4'
]

# 加载视频帧
def load_video_frames(video_path):
    reader = imageio.get_reader(video_path)
    frames = [frame for frame in reader]
    reader.close()  # 关闭reader释放资源
    return frames

# 调整视频尺寸
def resize_video(generated_frames, standard_size):
    resized_generated_frames = [cv2.resize(frame, (standard_size[1], standard_size[0])) for frame in generated_frames]
    return resized_generated_frames

# 计算PSNR
def calculate_psnr(standard_frames, generated_frames):
    psnr_values = []
    for i in range(len(standard_frames)):
        mse = np.mean((standard_frames[i] - generated_frames[i]) ** 2)
        if mse == 0:
            psnr = float('inf')  # 对于完全相同的图像，PSNR为无穷大
        else:
            max_pixel = 255.0
            psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
        psnr_values.append(psnr)
    return np.mean(psnr_values)

# 计算SSIM
def calculate_ssim(standard_frames, generated_frames):
    ssim_values = []
    for i in range(len(standard_frames)):
        # 规范化帧数据到[0, 1]并转换为float32
        standard_frame = torch.tensor(standard_frames[i]).permute(2, 0, 1).unsqueeze(0).float() / 255.0
        generated_frame = torch.tensor(generated_frames[i]).permute(2, 0, 1).unsqueeze(0).float() / 255.0
        ssim_val = ssim(standard_frame, generated_frame, data_range=1.0, size_average=True)  # 正确的data_range
        ssim_values.append(ssim_val.item())
    return np.mean(ssim_values)

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载预训练的Inception模型
inception_model = inception_v3(pretrained=True).to(device).eval()

# 定义一个函数来提取视频帧的特征，使用批量处理
def extract_features(video_path, model, device, batch_size=16):
    frames = []
    cap = cv2.VideoCapture(video_path)
    batch = []
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img = transforms.ToTensor()(img).unsqueeze(0).to(device)
        batch.append(img)
        
        if len(batch) == batch_size:
            with torch.no_grad():
                batch_tensor = torch.cat(batch, dim=0)
                features = model(batch_tensor)
                frames.extend(features.cpu().numpy())
            batch = []  # 重置批量
        
    # 处理剩余的帧
    if batch:
        with torch.no_grad():
            batch_tensor = torch.cat(batch, dim=0)
            features = model(batch_tensor)
            frames.extend(features.cpu().numpy())
    
    cap.release()  # 释放视频捕获资源
    return frames

# 计算每个视频特征的均值和协方差
def calculate_statistics(features):
    mean = np.mean(features, axis=0)
    cov = np.cov(features, rowvar=False)
    return mean, cov

# 使用FID公式计算两个视频特征分布之间的FID分数
def calculate_fid(standard_mean, standard_cov, generated_mean, generated_cov):
    # 计算均值差
    mean_diff = standard_mean - generated_mean
    # 计算协方差矩阵的平方根
    cov_sqrt = linalg.sqrtm(standard_cov @ generated_cov)
    
    # 计算FID
    fid = np.sum(mean_diff ** 2) + np.trace(standard_cov + generated_cov - 2 * cov_sqrt)
    
    # 检查是否出现NaN或负值
    if np.isnan(fid) or fid < 0:
        fid = 0  # 或者返回一个合理的默认值
    return fid

# 计算NIQE得分
def calculate_niqe_score(frames):
    niqe_metric = pyiqa.create_metric('niqe')
    scores = []
    for frame in frames:
        frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float() / 255.0  # 规范化到[0, 1]
        score = niqe_metric(frame_tensor)
        scores.append(score)
    del niqe_metric  # 删除NIQE指标以释放资源
    return torch.tensor(scores).mean().item()

# 输出指标名作为表头，设置列宽
header = "Video Name".ljust(20) + "PSNR Score".ljust(12) + "SSIM Score".ljust(12) + "FID Score".ljust(12) + "NIQE Score".ljust(12)
print(header)

# 循环处理每个视频
for video_name in video_names:
    path_standard_video = f'{raw_videos_path}/{video_name}'
    path_generated_video = f'{synthetic_videos_path}/{video_name}'
    
    standard_frames = load_video_frames(path_standard_video)
    generated_frames = load_video_frames(path_generated_video)
    
    # 检查尺寸是否一致，不一致则调整尺寸
    if generated_frames[0].shape != standard_frames[0].shape:
        generated_frames = resize_video(generated_frames, standard_frames[0].shape)
    
    psnr_score = round(calculate_psnr(standard_frames, generated_frames), 6)
    ssim_score = round(calculate_ssim(standard_frames, generated_frames), 6)
    
    standard_features = extract_features(path_standard_video, inception_model, device)
    generated_features = extract_features(path_generated_video, inception_model, device)
    
    standard_mean, standard_cov = calculate_statistics(standard_features)
    generated_mean, generated_cov = calculate_statistics(generated_features)
    
    fid_score = round(calculate_fid(standard_mean, standard_cov, generated_mean, generated_cov), 6)
    generated_niqe = round(calculate_niqe_score(generated_frames), 6)
    
    # 格式化输出，确保列对齐
    row = f"{video_name.ljust(20)}{psnr_score:<12}{ssim_score:<12}{fid_score:<12}{generated_niqe:<12}"
    print(row)
    
    # 释放不再需要的资源
    del standard_frames, generated_frames, standard_features, generated_features
    torch.cuda.empty_cache()  # 释放GPU缓存



  from .autonotebook import tqdm as notebook_tqdm


Video Name          PSNR Score  SSIM Score  FID Score   NIQE Score  
May.mp4             29.772568   0.652101    42.338212   5.151919    
Jae-in.mp4          29.750509   0.630766    89.133744   6.13143     
Lieu.mp4            30.694761   0.759348    30.025657   5.937247    
Macron.mp4          30.332479   0.733842    28.131079   5.964817    
Obama.mp4           30.766657   0.758716    37.66468    7.44125     
Obama1.mp4          29.598956   0.68332     47.335445   6.626041    
Obama2.mp4          30.605588   0.733553    30.716751   6.35921     
Shaheen.mp4         30.629148   0.748107    19.622528   5.441265    


In [1]:
import imageio
import torch
from pytorch_msssim import ssim
import numpy as np

# 视频路径
path_standard_video = 'data-20241209T141743Z-001/data/raw/videos/May.mp4'
path_generated_video = 'data-20241209T141743Z-001/data/synthetic/videos/May.mp4'

# 加载视频
def load_video_frames(video_path):
    reader = imageio.get_reader(video_path)
    frames = []
    for frame in reader:
        frames.append(frame)
    return frames

standard_frames = load_video_frames(path_standard_video)
generated_frames = load_video_frames(path_generated_video)

# 计算PSNR
def calculate_psnr(standard_frames, generated_frames):
    psnr_values = []
    for i in range(len(standard_frames)):
        mse = np.mean((standard_frames[i] - generated_frames[i]) ** 2)
        if mse == 0:
            psnr = 100
        else:
            max_pixel = 255.0
            psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
        psnr_values.append(psnr)
    return np.mean(psnr_values)

psnr_score = calculate_psnr(standard_frames, generated_frames)

print(f"PSNR Score: {psnr_score}")

# 计算SSIM
def calculate_ssim(standard_frames, generated_frames):
    ssim_values = []
    for i in range(len(standard_frames)):
        standard_frame = torch.tensor(standard_frames[i]).permute(2, 0, 1).unsqueeze(0).float()
        generated_frame = torch.tensor(generated_frames[i]).permute(2, 0, 1).unsqueeze(0).float()
        ssim_val = ssim(standard_frame, generated_frame, data_range=255, size_average=True)
        ssim_values.append(ssim_val.item())
    return np.mean(ssim_values)

ssim_score = calculate_ssim(standard_frames, generated_frames)

print(f"SSIM Score: {ssim_score}")



  mse = np.mean((standard_frames[i] - generated_frames[i]) ** 2)


PSNR Score: 29.772568152345716
SSIM Score: 0.6521003624369437


TypeError: a bytes-like object is required, not 'Image'

In [2]:
import torch
from torchvision.models import inception_v3
from torchvision import transforms
from PIL import Image
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os
import cv2  

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 视频路径
path_standard_video = 'data-20241209T141743Z-001/data/raw/videos/May.mp4'
path_generated_video = 'data-20241209T141743Z-001/data/synthetic/videos/May.mp4'

# 加载预训练的Inception模型
inception_model = inception_v3(pretrained=True).to(device).eval()

# 定义一个函数来提取视频帧的特征
def extract_features(video_path, model, device):
    # 视频帧提取和预处理
    frames = []
    cap = cv2.VideoCapture(video_path)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img = transforms.ToTensor()(img).unsqueeze(0).to(device)
        with torch.no_grad():
            features = model(img)
        frames.append(features.squeeze(0).cpu().numpy())
    cap.release()
    return frames

# 从每个视频中提取帧并计算特征
standard_frames = extract_features(path_standard_video, inception_model, device)
generated_frames = extract_features(path_generated_video, inception_model, device)

# 计算每个视频特征的均值和协方差
def calculate_statistics(features):
    mean = np.mean(features, axis=0)
    cov = np.cov(features, rowvar=False)
    return mean, cov

standard_mean, standard_cov = calculate_statistics(standard_frames)
generated_mean, generated_cov = calculate_statistics(generated_frames)

# 使用FID公式计算两个视频特征分布之间的FID分数
def calculate_fid(standard_mean, standard_cov, generated_mean, generated_cov):
    # 计算均值差异
    mean_diff = standard_mean - generated_mean
    # 计算协方差矩阵差异
    cov_diff = standard_cov + generated_cov - 2 * np.dot(standard_cov, generated_cov)
    # 计算FID
    fid = np.dot(mean_diff, mean_diff) + np.trace(cov_diff)
    return fid

fid_score = calculate_fid(standard_mean, standard_cov, generated_mean, generated_cov)
print(f'FID score: {fid_score}')

FID score: 52.521025925726924


In [3]:
import cv2
import torch
import pyiqa
import numpy
from tqdm import tqdm

# 设定视频路径
standard_video_path = 'data-20241209T141743Z-001/data/raw/videos/May.mp4'
generated_video_path = 'data-20241209T141743Z-001/data/synthetic/videos/May.mp4'

# 读取视频
def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()
    return frames

# 计算NIQE得分
def calculate_niqe_score(frames):
    niqe_metric = pyiqa.create_metric('niqe')
    scores = []
    for frame in tqdm(frames):
        # 将帧转换为张量
        frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float() / 255.0
        # 计算NIQE得分
        score = niqe_metric(frame_tensor)
        scores.append(score)
    return torch.tensor(scores).mean().item()


# 主函数
def main():
    # 读取视频帧
    #standard_frames = read_video(standard_video_path)
    generated_frames = read_video(generated_video_path)

    # 计算NIQE得分
    #standard_niqe = calculate_niqe_score(standard_frames)
    generated_niqe = calculate_niqe_score(generated_frames)

    # 输出评价得分
    #print(f"Standard Video NIQE Score: {standard_niqe}")
    print(f"Generated Video NIQE Score: {generated_niqe}")

if __name__ == "__main__":
    main()

100%|██████████| 6074/6074 [08:01<00:00, 12.62it/s]


Generated Video NIQE Score: 5.151919020330636
