In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

import numpy as np
import pandas as pd
from scipy.misc import imread
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'

In [2]:
import os,sys
import warnings
warnings.filterwarnings('ignore')

In [3]:
benchmark_all_ones=pd.read_csv('./all_ones_benchmark.csv')
benchmark_all_zeros=pd.read_csv('./all_zeros_benchmark.csv')
benchmark_central_pixel=pd.read_csv('./central_pixel_benchmark.csv')
groundtruth_train=pd.read_csv('./training_solutions_rev1.csv') # train中每个图像的相关类别的概率分布, dataframe type

In [4]:
lst=os.listdir('./images_training_rev1/') # train中每个图像的filename，即图像id，构成的list
lst.sort()
len(lst)

61578

In [20]:
train_imgs=[]
train_labels=[]
for img_name in lst[50000:60000]:
    file_name='./images_training_rev1/'+img_name
    img=imread(file_name)
    train_imgs.append(img)
#     train_labels.append(groundtruth_train.query('GalaxyID == {}'.format(img_name.split('.')[0])).to_numpy()[0,1:])
train_X=np.stack(train_imgs)
# train_y=np.stack(train_labels)
np.savez_compressed('train6', a6 = train_X)

In [17]:
x1 = np.load('train1.npz')
x1 = x1['arr_0']

In [None]:
x2 = np.load('train2.npz')
x3 = np.load('train3.npz')
x4 = np.load('train4.npz')
x5 = np.load('train5.npz')
x2 = x2['arr_0']
x3 = x3['a3']
x4 = x4['a4']
x5 = x5['a5']

In [9]:
_train_imgs=[]
for img_name in lst[:10000]:
    file_name='./images_training_rev1/'+img_name
    img=imread(file_name)
    _train_imgs.append(img)

In [11]:
_train_imgs=np.stack(_train_imgs)

In [16]:
_train_imgs.size

5393280000

In [None]:
test16=test.astype(np.float16)

In [14]:
train_X.shape,train_y.shape

((10000, 424, 424, 3), (10000, 37))

In [10]:
input_shape = (424,424,3)
classes = 37

NB_EPOCH = 20
BATCH_SIZE = 200
VERBOSE = 1
OPTIMIZER = Adam()
VALIDATION_SPLIT = 0.2
IMG_ROWS, IMG_COLS = 424, 424
NB_CLASSES = 37
INPUT_SHAPE = (IMG_ROWS, IMG_COLS, 3) # 注意在TF中的数据格式 NHWC

# 加载数据,转换编码格式并归一化
(x_train, y_train)=train_X[:800],train_y[:800]
(x_test, y_test) = train_X[800:],train_y[800:]
x_train = x_train.astype("float32") / 255.
x_test = x_test.astype(np.float32) / 255.

print(x_train.shape, "train shape")
print(x_test.shape, "test shape")

# y_train = to_categorical(y_train, NB_CLASSES)
# y_test = to_categorical(y_test, NB_CLASSES)

(800, 424, 424, 3) train shape
(200, 424, 424, 3) test shape


In [11]:
y_train.shape,y_test.shape

((800, 37), (200, 37))

In [63]:
class LeNet(Model):
    def __init__(self, input_shape=(424, 424, 3), num_classes=37):
        # super(LeNet, self).__init__(name="LeNet")
        self.num_classes = num_classes
        ''' 定义要用到的层 layers '''
        # 输入层
        img_input = Input(shape=input_shape)

        # Conv => ReLu => Pool
        x = Conv2D(filters=20, kernel_size=5, padding="same", activation="relu" ,name='block1_conv1')(img_input)
        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block1_pool')(x)
        # Conv => ReLu => Pool
        x = Conv2D(filters=50, kernel_size=5, padding="same", activation="relu", name='block1_conv2')(x)
        x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block1_poo2')(x)
        # 压成一维
        x = Flatten(name='flatten')(x)
        # 全连接层
        x = Dense(units=500, activation="relu", name="f1")(x)
        # softmax分类器
        x = Dense(units=num_classes, activation="softmax", name="prediction")(x)

        # 调用Model类的Model(input, output, name="***")构造方法
        super(LeNet, self).__init__(img_input, x, name="LeNet")

    def call(self, inputs):
        # 前向传播计算
        # 使用在__init__方法中定义的层
        return self.output(inputs)


model = LeNet(INPUT_SHAPE, NB_CLASSES)
model.summary()

model.compile(loss="categorical_crossentropy", optimizer=tf.train.RMSPropOptimizer(learning_rate=0.001),
              metrics=["accuracy"])
history = model.fit(x=x_train, y=y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, verbose=VERBOSE,
                    validation_split=VALIDATION_SPLIT)
score = model.evaluate(x=x_test, y=y_test, verbose=VERBOSE)
print("test loss:", score[0])
print("test acc:", score[1])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 424, 424, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 424, 424, 20)      1520      
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 212, 212, 20)      0         
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 212, 212, 50)      25050     
_________________________________________________________________
block1_poo2 (MaxPooling2D)   (None, 106, 106, 50)      0         
_________________________________________________________________
flatten (Flatten)            (None, 561800)            0         
_________________________________________________________________
f1 (Dense)                   (None, 500)               280900500 
__________

KeyboardInterrupt: 

In [None]:
# 列出历史数据
print(history.history.keys())
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title("model accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

# 汇总损失函数历史数据
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()


In [None]:
groundtruth_train.columns
groundtruth_train.shape

groundtruth_train.query('GalaxyID == 873932').to_numpy()[0, 1:].shape
groundtruth_train.to_numpy()[:, 1:].shape
np.sum(groundtruth_train.to_numpy()[:,0]==np.sort(groundtruth_train.to_numpy()[:,0]))

In [None]:
classes=['Class{}'.format(i) for i in range(1,12)]
subclasses={
    'Class{}'.format(A):['Class{}.{}'.format(A, i) for i in range(1, B+1)] for A,B in [(1,3),(2,2),(3,2),(4,2),(5,4),(6,2),(7,3),(8,7),(9,3),(10,3),(11,6)]
}
subclasses

In [5]:
I=(np.random.rand(3,3,2)*10).astype(np.int32)
I

array([[[8, 4],
        [5, 3],
        [5, 8]],

       [[8, 1],
        [0, 1],
        [5, 6]],

       [[5, 5],
        [5, 7],
        [8, 9]]], dtype=int32)

In [8]:
I.T
I.T.shape
# I90=np.rot90(I)
# I90
Iflip = np.flip(I.T, axis=2)
Iflip

array([[[8, 8, 5],
        [5, 0, 5],
        [5, 5, 8]],

       [[4, 1, 5],
        [3, 1, 7],
        [8, 6, 9]]], dtype=int32)

(2, 3, 3)

array([[[5, 8, 8],
        [5, 0, 5],
        [8, 5, 5]],

       [[5, 1, 4],
        [7, 1, 3],
        [9, 6, 8]]], dtype=int32)