In [90]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import time
from torch import nn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [91]:
path = r'data'
kernels = [[[-1, 1]], [[-1],[1]],[[0, -1, 0], [-1, 4, -1], [0, -1, 0]], [[-1, 0, -1], [-1, 6, -1], [-1, 0, -1]], [[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]]]
indexes = [0, 2, 4, 6, 8, 9, 10, 50, 100, 200, 255]
nr_imgs = 100

In [92]:
pool = nn.MaxPool1d(5)

In [93]:
def pre_process(idx: int, typ: int = 1):
    res = []
    src = 'Stego' if typ == 1 else 'Cover' # 1 for stego, 0 for cover
    img = cv2.imread(path + f'/{src}/{idx+1}.pgm') # 读入图像数据
    for ker in kernels:
        cov = cv2.filter2D(img, cv2.CV_8U, kernel=np.array(ker)) # 卷积
        hist = cv2.calcHist([cov], [0], None, [256], [0, 256]).flatten() # 计算直方图
        hist = pool(torch.tensor(hist.reshape(1,-1))).numpy().squeeze() # 池化
        res = np.concatenate((res, hist), axis=0) # 拼接
    res = np.append(res, float(typ)) # 添加标签
    return res


print(pre_process(0))


[1.60172e+05 6.20900e+03 1.97200e+03 8.57000e+02 4.61000e+02 2.48000e+02
 1.59000e+02 1.12000e+02 8.10000e+01 5.40000e+01 3.70000e+01 2.90000e+01
 2.50000e+01 1.80000e+01 1.50000e+01 1.10000e+01 1.00000e+01 9.00000e+00
 9.00000e+00 5.00000e+00 7.00000e+00 3.00000e+00 3.00000e+00 2.00000e+00
 1.00000e+00 1.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 1.59147e+05 5.24300e+03 2.75700e+03
 1.50700e+03 9.37000e+02 5.83000e+02 4.27000e+02 3.10000e+02 2.04000e+02
 1.58000e+02 1.28000e+02 9.30000e+01 7.80000e+01 6.80000e+01 5.90000e+01
 5.00000e+01 4.30000e+01 3.50000e+01 4.30000e+01 3.20000e+01 2.60000e+01
 2.50000e+01 1.80000e+01 1.50000e+01 1.50000e+01 7.00000e+00 7.00000e+00
 6.00000e+00 1.00000e+00 5.00000e+00 1.00000e+00 0.

In [94]:
from collections import Counter
indexes = [0, 1, 2, 3, 4, 5, 6, 8, 10]
kernel = [[-1, 0, -1], [-1, 6, -1], [-1, 0, -1]]

def pre_process(idx: int, typ: int = 1, indexes=indexes):
    src = 'Stego' if typ == 1 else 'Cover' # 1 for stego, 0 for cover
    img = cv2.imread(path + f'/{src}/{idx+1}.pgm') # 读入图像数据
    cov = cv2.filter2D(img, cv2.CV_16S, kernel=np.array(kernel)) # 卷积
    arr = np.array(cov).flatten() # 拉平
    ans = Counter(arr)  # 计数
    ans = np.array([(ans[i]+ans[-i]) for i in indexes]).flatten() # 统计
    cv2.normalize(ans, ans, 0, 1, cv2.NORM_MINMAX) # 归一化
    ans = np.append(ans, typ) # 添加标签
    return ans


print([(pre_process(i, 0),pre_process(i, 1)) for i in range(1)])

[(array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 1, 1, 1, 0, 0, 0, 0, 1]))]


In [95]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB

In [96]:
covers = [pre_process(i, 0) for i in range(nr_imgs)]
stegos = [pre_process(i, 1) for i in range(nr_imgs)]
data = np.array(covers + stegos)
np.random.seed(32)
np.random.shuffle(data)

In [97]:
# split_line = int(data.shape[0] * 0.8)
data_x= data[:, :-1]
data_y= data[:, -1]

In [98]:
model1=GaussianNB()
prediction1=cross_val_predict(model1,data_x,data_y,cv=10)
acc1 = accuracy_score(data_y,prediction1)
precision1 = precision_score(data_y,prediction1)
recall1 = recall_score(data_y,prediction1)
f1 = f1_score(data_y,prediction1)
print("GaussianNB的四项指标")
print("精度:",acc1)
print("查准率:",precision1)
print("查全率:",recall1)
print("f1值:",f1)

GaussianNB的四项指标
精度: 0.77
查准率: 0.684931506849315
查全率: 1.0
f1值: 0.8130081300813007


In [99]:
model2=BernoulliNB()
prediction2=cross_val_predict(model2,data_x,data_y,cv=10)
acc2 = accuracy_score(data_y,prediction2)
precision2 = precision_score(data_y,prediction2)
recall2 = recall_score(data_y,prediction2)
f2 = f1_score(data_y,prediction2)
print("BernoulliNB的四项指标")
print("精度:",acc2)
print("查准率:",precision2)
print("查全率:",recall2)
print("f1值:",f2)


BernoulliNB的四项指标
精度: 0.865
查准率: 0.8411214953271028
查全率: 0.9
f1值: 0.8695652173913043


In [100]:
model=MultinomialNB()
prediction=cross_val_predict(model,data_x,data_y,cv=10)
acc = accuracy_score(data_y,prediction)
precision = precision_score(data_y,prediction)
recall = recall_score(data_y,prediction)
f = f1_score(data_y,prediction)
print("MultinomialNB的四项指标")
print("精度:",acc)
print("查准率:",precision)
print("查全率:",recall)
print("f1值:",f)

MultinomialNB的四项指标
精度: 0.87
查准率: 0.8490566037735849
查全率: 0.9
f1值: 0.8737864077669903
