In [2]:
import tensorflow.keras as keras
import os
import cv2
from PIL import Image, ImageFile
from sklearn import svm
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
import math

In [3]:
model = ResNet50(weights='imagenet')
conv_base = ResNet50(include_top=False, input_shape=(224,224,3)) #模型也可以看作一个层
model = Sequential()
model.add(conv_base) 
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [4]:
print("冻结之前可训练的张量个数：", len(model.trainable_weights)) #结果为30
conv_base.trainable = False
print("冻结之后可训练的张量个数：", len(model.trainable_weights)) #结果为4

冻结之前可训练的张量个数： 216
冻结之后可训练的张量个数： 4


In [6]:
# 数据生成器
class DataGenerator(keras.utils.Sequence):
	def __init__(self, filenames, labels, batch_size):
		self.filenames = filenames
		self.labels = labels
		self.batch_size = batch_size
		self._shuffle()

	def _shuffle(self):
		self.indexes = np.arange(len(self.filenames))
		np.random.shuffle(self.indexes)

	@staticmethod
	def _parse(imgPath):
		xx = np.zeros((224,224,3))
		img = Image.open(imgPath)
		out = img.resize((224,224))
		out = np.array(out)
		s = out.shape
		xx[:224,:,:] = out[:,:,:]
		#获取图像频域数据：暂时先*盲目的*只取了通道0做DCT： 先DCT - 再把高度resize成64 - 再做FFT - 最后每行采样250个点
		'''
		img = cv2.imread(imgPath)
		img = img[:,:,0]
		if img.shape[0] % 2 == 1:
			img = img[:img.shape[0]-1,:]
		if img.shape[1] % 2 == 1:
			img = img[:,:img.shape[1]-1]
		imgdct = cv2.dct(np.float32(img))
		if imgdct.shape[1] >= 250:
			imgdct = cv2.resize(imgdct, (imgdct.shape[1],64))
		else:
			imgdct = cv2.resize(imgdct, (250,64))
		res = np.zeros((64,250))
		idx =np.linspace(0,imgdct.shape[1]-1,num=250, dtype=int)
		#for i in range(64):
		#fftres = fft(imgdct[i,:])
		#	res[i,:] = fftres[idx]
		#xx[224:224+64,:125,0] = res[:,:125]
		#xx[224:224+64,:125,1] = res[:,125:]
		#end
		'''        
		return xx

	def on_epoch_end(self):
		self._shuffle()

	def __len__(self):
		return math.ceil(len(self.filenames)/float(self.batch_size))

	def __getitem__(self, idx):
		batch_indexes = self.indexes[idx * self.batch_size: (idx + 1) * self.batch_size]
		batch_x = np.array([self._parse(self.filenames[i]) for i in batch_indexes])
		batch_y = np.array([self.labels[i] for i in batch_indexes])
		return batch_x, batch_y


cnt = 0
filenames = []
labels = []

#读取文件名和labels
def getFiles(folder, y):
	global cnt
	global filenames
	global labels
	root_path ='E:/LectureFile/datamining/traindata/'+folder+'/'
	dir = root_path
	for root,dir,files in os.walk(dir):
		for file in files:
			fname = str(file)
			if fname[0] == '.':
				continue
			imgPath = root_path+fname
			if imgPath[-4:] != ".gif":
				if os.path.getsize(imgPath) >= 1024*1024*3:
					continue
				img = Image.open(imgPath)
				if img.size[0]*img.size[1] >= 1000*1000:
					continue
				out = img.resize((224,224))
				out = np.array(out)
				s = out.shape
				if len(s) != 3 or s[0] != 224 or s[1] != 224 or s[2] != 3:
					continue
				filenames.append(imgPath)
				labels.append(y)
				cnt += 1
				if cnt%1000==0:
					print(cnt)
					if cnt>=16000:
						break
getFiles('rumor_pic',0)
getFiles('true_pic_1',1)
getFiles('truth_pic_2',1)
print(len(filenames),len(labels))

#训练前先Shuffle操作一次
L = len(filenames)
idx = np.array(list(range(L)))
np.random.shuffle(idx)
n_filenames = []
n_labels = []
for i in range(len(idx)):
	n_filenames.append(filenames[idx[i]])
	n_labels.append(labels[idx[i]])
filenames = n_filenames
labels = n_labels

#数据生成器，80%训练，20%测试
train_gen = DataGenerator(filenames[:int(0.8*L)], labels[:int(0.8*L)], 32)
test_gen  = DataGenerator(filenames[int(0.8*L):], labels[int(0.8*L):], 32)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
16000 16000


In [7]:
from tensorflow.keras import optimizers
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
H = model.fit_generator(train_gen, 
                       steps_per_epoch=500/20,
                       epochs=1,
                       validation_data=test_gen,
                       validation_steps=1000/20)



In [7]:
#读取图片并生成一个输入数据
def read_data(imgPath):
	ifread = True
	if ifread:
		imgs = cv2.imread(imgPath)
		out = np.zeros((224,224,3))
		for i in range(3):
			img = imgs[:,:,i]

			#====== 选择像素域作为输入 ======
			imgdct = cv2.resize(img, (224,224))

			#====== 选择离散余弦变换作为输入 ======
			'''if img.shape[0] % 2 == 1:
				img = img[:img.shape[0]-1,:]
			if img.shape[1] % 2 == 1:
				img = img[:,:img.shape[1]-1]
			imgdct = cv2.resize(cv2.dct(np.float32(img)), (128,128))

			#====== 选择傅立叶变换作为输入 ======
			#imgdct = cv2.resize(np.real(fft(img)), (128,128))'''

			out[:,:,i] = imgdct
		out = out.flatten()
	return out

In [11]:
def chooseData(count,start):
	tot = 0
	cx = []
	cy = []
	print(len(filenames))    
	for i in range(start, len(filenames)):
		cx.append(read_data(filenames[i]))
		cy.append(labels[i])
		tot += 1
		if tot % 1000 == 0:
			print(tot)
		if tot == count:
			break
	cx = np.array(cx)
	cy = np.array(cy)
	return cx,cy,i

In [13]:
#获得测试数据
print("Start reading testing data ...")
test_x, test_y, last = chooseData(12000, 4000)
print(test_x.shape, test_y.shape)
N_test = test_y.shape[0]
print("Scanned Testing Sample:", N_test, '( Actual testing size:', test_y.shape[0], ')')

Start reading testing data ...
8000
1000
2000
3000
4000
(4000, 150528) (4000,)
Scanned Testing Sample: 4000 ( Actual testing size: 4000 )


In [27]:
test_x.resize((4000,224,224,3))
print("Start predicting ...")
pred_y = model.predict(test_x)
test_acc = (np.sum(pred_y == test_y)) / N_test
print("Testing Acc:", test_acc)

Start predicting ...
Testing Acc: 3915.0


In [35]:
print(N_test)
print(np.sum(pred_y))

4000
0.0


In [36]:
test_y.resize(4000,1)

In [37]:
test_acc = (np.sum(pred_y == test_y)) / N_test

In [38]:
print(test_acc)

0.97875
