In [4]:
import numpy as np
import os
import sys
from PIL import Image
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
from fnmatch import fnmatch
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [5]:
print(tf.__version__)

2.7.0


In [6]:
data = pd.read_csv('./data/DFBS_extracted.csv', index_col='Unnamed: 0')
data.head()

Unnamed: 0,_Glon,_Glat,_RAJ2000,_DEJ2000,Cl,Name,Vmag,z,plate,path,dx,dy
22,120.963506,-22.480532,10.4725,40.354722,Sy1,MARK 957,15.14,0.073,fbs0809_cor,data/images/Sy1/22__MARK 957.tiff,4310.0,7798.0
23,121.040282,-22.511401,10.567083,40.326667,Sy1,IV Zw 29,16.3,0.102,fbs0809_cor,data/images/Sy1/23__IV Zw 29.tiff,4140.0,7738.0
37,129.448039,-49.313722,17.22,13.337222,Sy1,3C 33.0,15.9,0.06,fbs0089_cor,data/images/Sy1/37__3C 33.0.tiff,2645.0,316.0
38,133.401721,-62.040063,17.747083,0.433611,Sy1,SDSS J01109+0026,15.72,0.019,fbs1175_cor,data/images/Sy1/38__SDSS J01109+0026.tiff,7944.0,7561.0
43,136.75979,-62.174109,19.265,0.007778,Sy1,2E 0114-0015,16.79,0.046,fbs1175_cor,data/images/Sy1/43__2E 0114-0015.tiff,4394.0,6680.0


In [41]:
data['fname'] = data.path.str.split('/', expand=True).iloc[:,-1]

In [42]:
all_tiff_files = []
listOfFiles = os.listdir('./data/images_2/')
pattern = "*.tiff"
for entry in listOfFiles:
    if fnmatch(entry, pattern):
        all_tiff_files.append('./data/images_2/'+entry)

all_tiff_files[0], len(all_tiff_files)

('./data/images_2/1036__RXS J00281+3103.tiff', 990)

In [72]:
DATA_AUG_BATCH_SIZE = 32  # batch size for data augmentation

In [73]:
n_steps_data_aug = np.ceil(data.shape[0]/DATA_AUG_BATCH_SIZE).astype(int)

In [74]:
datagen = ImageDataGenerator(
    rotation_range=1,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode="nearest"
    # rescale = 1./0xff
)

In [75]:
img_size = (140, 20)  # input image size to model
img_path = "./data/images_2/"
aug_img_path = "./data/images_22/"
SEED = 1

In [76]:
# Feed images to the data generator 
aug_gen = datagen.flow_from_dataframe(dataframe=data, directory=img_path, save_to_dir=aug_img_path, save_prefix='aug', save_format='tiff', x_col='fname', y_col="Cl", batch_size=DATA_AUG_BATCH_SIZE, seed=SEED, shuffle=False, color_mode='grayscale', class_mode="categorical", target_size=img_size)

Found 990 validated image filenames belonging to 9 classes.


In [80]:
for i in range(2*n_steps_data_aug):
    next(aug_gen)

In [209]:
augmented_images = np.array(os.listdir('./data/images_22/'))
aug_data = pd.concat([pd.Series(augmented_images).str.split('_', expand=True)[1], './data/images_22/' + pd.Series(augmented_images)], axis=1)

In [210]:
aug_data['Cl'] = data['Cl'].iloc[aug_data[1]].values
aug_data['Name'] = data['Name'].iloc[aug_data[1]].values
aug_data[1] = data.iloc[aug_data[1]].index
aug_data.rename(columns={0: "path", 1: "data_index"}, inplace=True)
aug_data.head()

Unnamed: 0,data_index,path,Cl,Name
0,22,./data/images_22/aug_0_1091330.tiff,Sy1,MARK 957
1,22,./data/images_22/aug_0_1166945.tiff,Sy1,MARK 957
2,22,./data/images_22/aug_0_1441761.tiff,Sy1,MARK 957
3,22,./data/images_22/aug_0_1466666.tiff,Sy1,MARK 957
4,22,./data/images_22/aug_0_1680659.tiff,Sy1,MARK 957


In [211]:
aug_data['Cl'].value_counts()

M      16482
sd     13119
Mrk     9782
PN      8785
WD      5712
Sy1     5644
QSO     5508
cv      1273
C        201
Name: Cl, dtype: int64

In [212]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(aug_data['Cl'])
aug_data['Cl']=le.transform(aug_data['Cl'])
aug_data.head()

Unnamed: 0,data_index,path,Cl,Name
0,22,./data/images_22/aug_0_1091330.tiff,5,MARK 957
1,22,./data/images_22/aug_0_1166945.tiff,5,MARK 957
2,22,./data/images_22/aug_0_1441761.tiff,5,MARK 957
3,22,./data/images_22/aug_0_1466666.tiff,5,MARK 957
4,22,./data/images_22/aug_0_1680659.tiff,5,MARK 957


In [213]:
values = aug_data['Cl'].value_counts()
num_classes = len(values)
values

1    16482
8    13119
2     9782
3     8785
6     5712
5     5644
4     5508
7     1273
0      201
Name: Cl, dtype: int64

In [214]:
X = aug_data.loc[:, 'path']
Y = aug_data.loc[:, 'Cl']

In [215]:
X = X.values
Y = Y.values

In [217]:
from tqdm import tqdm

images_list = []
max_width = 0
max_height = 0
ind = 0
for i in tqdm(range(len(X))):
    im = Image.open(X[i])
    arr = np.array(im)#.flatten()
    
    arr=(arr-arr.min())/(arr.max()-arr.min())
    #arr = arr / 0xffff
    s = arr.shape
    if s[0] > max_height:
        max_height = s[0]
    if s[1] > max_width:
        max_width = s[1]
        ind = i
    images_list.append(arr)

 19%|█▊        | 12307/66506 [02:06<09:15, 97.54it/s] 


KeyboardInterrupt: 

In [None]:
images_list.__len__()

14556

In [None]:
print(max_width, max_height)
max_width, max_height = 20, 140

In [None]:
for i in range(len(images_list)):
    s = images_list[i].shape
    d_width = (max_width - s[1])
    d_height = (max_height - s[0])
    
    d_top = int(d_height / 2)
    d_bottom = int(d_height - d_top)
    
    d_left = int(d_width / 2)
    d_right = int(d_width - d_left)
    #print(d_top, d_bottom, d_left, d_right)
    
    arr = images_list[i]
    for l in range(d_left):
        arr = np.insert(arr, 0, 0, axis = 1)
    
    for r in range(d_right):
        b = np.zeros((s[0],1))
        arr = np.append(arr, b, axis = 1)
    
    for t in range(d_top):
        arr = np.insert(arr, 0, 0, axis = 0)
    
    for b in range(d_bottom):
        b = np.zeros((1, arr.shape[1],))
        arr = np.append(arr, b, axis = 0)
    
    images_list[i] = arr#.flatten()

In [None]:
width = max_width
height = max_height
print(width, height)
plt.imshow(images_list[100])
plt.gray()
plt.show()

In [None]:
from tensorflow.python.client import device_lib
import tensorflow as tf
import keras
# print(device_lib.list_local_devices())

In [None]:
images_np = np.array(images_list)
file_names_np = np.array(all_tiff_files)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(images_np, Y, test_size=0.1, shuffle=True, stratify=Y)
Y_train = tf.keras.utils.to_categorical(Y_train, num_classes)
Y_test = tf.keras.utils.to_categorical(Y_test, num_classes)

In [None]:
input_shape = (140, 20, 1)
X_train = X_train.reshape(X_train.shape[0], input_shape[0], input_shape[1], input_shape[2])
X_test = X_test.reshape(X_test.shape[0], input_shape[0], input_shape[1], input_shape[2])

In [None]:
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

In [None]:
from keras.models import Sequential, Model, Input
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Activation, BatchNormalization, concatenate
from tensorflow.keras import regularizers

In [None]:
reg  = None
reg_l1 = regularizers.l1()
reg_l2 = regularizers.l2()
ks = 16 
drop_size = 0.1
act = "relu"
from keras.callbacks import LearningRateScheduler
def lr_schedule(epoch):
    lrate = 0.001
    if epoch > 6:
        lrate = 0.0005
    if epoch > 12:
        lrate = 0.00025
    if epoch > 18:
        lrate = 0.0001
    if epoch > 25:
        lrate = 0.00001
    return lrate

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), input_shape=input_shape, padding="same"))
model.add(MaxPooling2D(pool_size=(3, 1)))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 1)))

model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 4)))

model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dense(num_classes,activation=tf.nn.softmax))

In [None]:
i = Input(input_shape)

#32----------------------------------------------------------------------------------
t11 = Conv2D(ks,(2,2),kernel_regularizer = reg,padding = "same",activation = act)(i)
t11 = BatchNormalization()(t11)
t1i  = concatenate((i,t11))

t12 = Conv2D(ks*2,(2,2),kernel_regularizer = reg,padding = "same",activation = act)(t1i)
t12 = BatchNormalization()(t12)
t2i = concatenate((i,t11,t12))

t13 = Conv2D(ks*4,(2,2),kernel_regularizer = reg,padding = "same",activation = act)(t2i)
t13 = BatchNormalization()(t13)
t13 = MaxPooling2D(2,2)(t13)
t13 = Dropout(drop_size*2)(t13)
#16----------------------------------------------------------------------------------
t21 = Conv2D(ks*2,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t13)
t21 = BatchNormalization()(t21)
t2i2  = concatenate((t13,t21))
t22 = Conv2D(ks*4,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t2i2)
t22 = BatchNormalization()(t22)
t2i3 = concatenate((t13,t21,t22))
t23 = Conv2D(ks*8,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t2i3)
t23 = BatchNormalization()(t23)
t23 = MaxPooling2D(2,2)(t23)
t23 = Dropout(drop_size*2)(t23)
#8--------------------------------------------------------------------------------------
t31 = Conv2D(ks*2,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t23)
t31 = BatchNormalization()(t31)
t3i2  = concatenate((t23,t31))
t32 = Conv2D(ks*4,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t3i2)
t32 = BatchNormalization()(t32)
t3i3 = concatenate((t23,t31,t32))
t33 = Conv2D(ks*8,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t3i3)
t33 = BatchNormalization()(t33)
t33 = MaxPooling2D(2,2)(t33)
t33 = Dropout(drop_size*2)(t33)
#4------------------------------------------------------------------------------------
# output = concatenate((t12,t))
# output = BatchNormalization()(output)
# output = GlobalAveragePooling2D()(t6)
# output = GlobalMaxPooling2D()(t6)
output = Flatten()(t33)
# output = Dropout(drop_size*4)
output = Dense(16*ks,kernel_regularizer = reg,activation = act)(output)
output = BatchNormalization()(output)
output = Dropout(2*drop_size)(output)
output = Dense(16*ks,kernel_regularizer = reg,activation = act)(output)
output = BatchNormalization()(output)
output = Dropout(2*drop_size)(output)
output = Dense(16*ks,kernel_regularizer = reg,activation = act)(output)
output = BatchNormalization()(output)
output = Dropout(2*drop_size)(output)

output = Dense(num_classes,activation='softmax')(output)
model = Model(i,output)

model.compile(optimizer ="adam",
                     loss = "categorical_crossentropy",
                      metrics = ["accuracy"])