In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

from PIL import Image

from tensorflow import device as tf_device
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import sys
import shutil
from fnmatch import fnmatch
from collections import Counter

from _helpers import loadImages, prepareData, make_directory

In [2]:
from tqdm import tqdm
from time import perf_counter

In [3]:
from tensorflow import __version__
print(__version__)

2.7.1


In [4]:
# csv_input_path = '../input/combined-4000/Combined_extracted.csv'
# astro_input_path = '../input/resized-3913/images_resized/'
# contours_path = './extractedContours/'

# # Augmentation parameters
# train_aug_path = 'augmented/train/'
# test_aug_path = 'augmented/test/'
# img_size = (140, 20)  # input image size to model
# SEED = 1

# # Model parameters
# checkpoint_directory = 'checkpoint/'

In [5]:
# csv_input_path = 'data/Datasets/Subtypes_extracted.csv'
# astro_input_path = 'data/Extracted/Subtypes/images_resized/'
# class_column = 'Sp type'
# contours_path = 'data/Contours/extractedContours/'

# # Augmentation parameters
# train_aug_path = 'data/Extracted/Subtypes/augmented/train/'
# test_aug_path = 'data/Extracted/Subtypes/augmented/test/'
# img_size = (140, 20)  # input image size to model
# SEED = 1

# # Model parameters
# checkpoint_directory = 'data/Checkpoints/Subtypes/'

In [6]:
csv_name = 'Subtypes'
setting = '13_2'
class_column = 'Sp type'

In [7]:
csv_input_path = f'data/Datasets/{csv_name}_{setting}_extracted.csv'
astro_input_path = f'data/Extracted/{csv_name}/{setting}/images_resized/'
contours_path = 'data/Contours/extractedContours/'

# Augmentation parameters
train_aug_path = f'data/Extracted/{csv_name}/{setting}/augmented/train/'
test_aug_path = f'data/Extracted/{csv_name}/{setting}/augmented/test/'
img_size = (140, 20)  # input image size to model
SEED = 1

# Model parameters
checkpoint_directory = f'data/Checkpoints/{csv_name}/{setting}/'

In [8]:
data = prepareData(csv_input_path)

In [9]:
data.shape

(1972, 14)

In [10]:
data['fname'] = data.path.str.split('/', expand=True, ).iloc[:,-1]

In [11]:
data[class_column].value_counts()

sdB        559
C-H        430
Mrk SB     399
C Ba       155
sdA        112
sdO        112
Mrk AGN     68
Mrk Abs     66
C-R         40
C-N         31
Name: Sp type, dtype: int64

In [12]:
# data.drop(data[(data[class_column] == 'cv') | (data[class_column] == 'QSO') | (data[class_column] == 'WD') | (data[class_column] == 'Sy1')].index, inplace=True)
data = data[data[class_column].isin({'sdB', 'C-H', 'Mrk SB', 'C Ba'})]

In [13]:
def split_dataframe(data, coef):
    train = pd.DataFrame(columns = data.columns)
    test = pd.DataFrame(columns = data.columns)
    for c in data[class_column].unique():
        train_c, test_c = train_test_split(data[data[class_column]==c], test_size=coef, shuffle=True)
        train = pd.concat([train, train_c], axis=0)
        test = pd.concat([test, test_c], axis=0)
    return train, test

In [14]:
train, test = split_dataframe(data, 0.2)
train.shape, test.shape

((1234, 15), (309, 15))

In [15]:
train[class_column].value_counts(), test[class_column].value_counts()

(sdB       447
 C-H       344
 Mrk SB    319
 C Ba      124
 Name: Sp type, dtype: int64,
 sdB       112
 C-H        86
 Mrk SB     80
 C Ba       31
 Name: Sp type, dtype: int64)

In [16]:
def augment(data, n_times=1, batch_size=2, img_size=(140,20), input_path='data/images/',
            output_path='data/augmented/', seed = None, save_format='png', x_col='fname', y_col="Cl",
            shuffle=False, color_mode='grayscale', class_mode="categorical"):
    n_steps_data_aug = np.ceil(data.shape[0]/batch_size).astype(int)
    save_prefix = 'aug'

    datagen = ImageDataGenerator(
        rotation_range=1,
        width_shift_range=0.05,
        height_shift_range=0.05,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=False,
        fill_mode="nearest"
        # rescale = 1./0xff
    )

    aug_gen = datagen.flow_from_dataframe(dataframe=data, directory=input_path,
                                          save_to_dir=output_path, save_prefix=save_prefix,
                                          save_format=save_format, x_col=x_col, y_col=y_col,
                                          batch_size=batch_size, seed=seed,
                                          shuffle=shuffle, color_mode=color_mode,
                                          class_mode=class_mode, target_size=img_size)

    make_directory(output_path)

    for i in tqdm(range(n_times*n_steps_data_aug)):
        next(aug_gen)

    augmented_images = np.array(os.listdir(output_path))
    aug_data = pd.concat([pd.Series(augmented_images).str.split('_', expand=True)[1], output_path + pd.Series(augmented_images)], axis=1)

    aug_data[y_col] = data[y_col].iloc[aug_data[1]].values
    aug_data['Name'] = data['Name'].iloc[aug_data[1]].values
    aug_data[1] = data.iloc[aug_data[1]].index
    aug_data.rename(columns={0: "path", 1: "data_index"}, inplace=True)

    return aug_data, aug_gen.classes

In [17]:
aug_train, aug_classes = augment(
    train, n_times=10, batch_size=32, img_size=img_size,
    input_path=astro_input_path, output_path=train_aug_path,
    seed=SEED, y_col=class_column)
aug_test, _ = augment(
    test, n_times=4, batch_size=32, img_size=img_size,
    input_path=astro_input_path, output_path=test_aug_path,
    seed=SEED, y_col=class_column)
aug_train.head()

Found 1234 validated image filenames belonging to 4 classes.


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 390/390 [00:07<00:00, 50.20it/s]


Found 309 validated image filenames belonging to 4 classes.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 48.28it/s]


Unnamed: 0,data_index,path,Sp type,Name
0,116,data/Extracted/Subtypes/13_2/augmented/train/a...,C Ba,J125834.26+243437.2
1,2566,data/Extracted/Subtypes/13_2/augmented/train/a...,sdB,PG1609+195
2,537,data/Extracted/Subtypes/13_2/augmented/train/a...,C-H,J074213.69+285444.9
3,2763,data/Extracted/Subtypes/13_2/augmented/train/a...,sdB,PG1416+110
4,2450,data/Extracted/Subtypes/13_2/augmented/train/a...,sdB,PG0843+246


In [18]:
aug_train[class_column].value_counts(), aug_train.shape

(sdB       4470
 C-H       3440
 Mrk SB    3190
 C Ba      1240
 Name: Sp type, dtype: int64,
 (12340, 4))

In [19]:
le = LabelEncoder()
le.fit(aug_train[class_column])
aug_train[class_column]=le.transform(aug_train[class_column])
aug_test[class_column]=le.transform(aug_test[class_column])
aug_train.head()

Unnamed: 0,data_index,path,Sp type,Name
0,116,data/Extracted/Subtypes/13_2/augmented/train/a...,0,J125834.26+243437.2
1,2566,data/Extracted/Subtypes/13_2/augmented/train/a...,3,PG1609+195
2,537,data/Extracted/Subtypes/13_2/augmented/train/a...,1,J074213.69+285444.9
3,2763,data/Extracted/Subtypes/13_2/augmented/train/a...,3,PG1416+110
4,2450,data/Extracted/Subtypes/13_2/augmented/train/a...,3,PG0843+246


In [20]:
values = aug_train[class_column].value_counts()
num_classes = len(values)
values

3    4470
1    3440
2    3190
0    1240
Name: Sp type, dtype: int64

In [21]:
X_train_path = aug_train.loc[:, 'path'].values
Y_train = aug_train.loc[:, class_column].values
X_test_path = aug_test.loc[:, 'path'].values
Y_test = aug_test.loc[:, class_column].values

In [22]:
X_train = loadImages(X_train_path)
X_test = loadImages(X_test_path)

In [23]:
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)

In [24]:
X_train.shape, Y_train.shape

((12340, 140, 20), (12340, 4))

In [25]:
input_shape = (img_size[0], img_size[1], 1)
X_train = X_train.reshape(X_train.shape[0], input_shape[0], input_shape[1], input_shape[2])
X_test = X_test.reshape(X_test.shape[0], input_shape[0], input_shape[1], input_shape[2])

In [26]:
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

x_train shape: (12340, 140, 20, 1)
12340 train samples
1236 test samples


In [31]:
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_directory + 'checkpoint',
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

make_directory(checkpoint_directory)

def lr_schedule(epoch):
    lrate = 0.002
    if epoch > 3:
        lrate = 0.001
    if epoch > 6:
        lrate = 0.0005
    if epoch > 9:
        lrate = 0.00025
    if epoch > 12:
        lrate = 0.0001
    if epoch > 15:
        lrate = 0.00005
    return lrate

lr_scheduler = LearningRateScheduler(lr_schedule)

In [32]:
counter = Counter(aug_classes)
max_val = float(max(counter.values()))       
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}

In [33]:
model = Sequential()
model.add(Conv2D(128, kernel_size=(3,3), input_shape=input_shape, padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(1, 2)))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 1)))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 1)))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [34]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 140, 20, 128)      1280      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 70, 10, 128)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 70, 10, 128)       147584    
                                                                 
 activation (Activation)     (None, 70, 10, 128)       0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 70, 5, 128)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 70, 5, 64)        

In [35]:
model.compile(optimizer ="adamax",
                 loss = "categorical_crossentropy",
                  metrics = ["accuracy"])

In [36]:
# from tensorflow.keras.utils import Sequence

# class DataGenerator(Sequence):
#     def __init__(self, x_set, y_set, batch_size):
#         self.x, self.y = x_set, y_set
#         self.batch_size = batch_size

#     def __len__(self):
#         return int(np.ceil(len(self.x) / float(self.batch_size)))

#     def __getitem__(self, idx):
#         batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
#         batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
#         return batch_x, batch_y

# train_gen = DataGenerator(X_train, Y_train, 32)
# test_gen = DataGenerator(X_test, Y_test, 256)

In [38]:
with tf_device('GPU:0'):
    history = model.fit(X_train, Y_train, batch_size=8, epochs = 30, validation_data=(X_test, Y_test), class_weight=class_weights, callbacks=[model_checkpoint_callback])
#     lr_scheduler

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
gen = ImageDataGenerator(
#     rotation_range = 2,
#     shear_range=1,
#     zoom_range=0.1,
    validation_split = 0.2
)

In [None]:
def Charts(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

In [29]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization, Dense, Dropout, Flatten, LeakyReLU
model1 = Sequential()

model1.add(Conv2D(32, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu', input_shape = input_shape))
model1.add(Conv2D(32, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(Conv2D(32, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(MaxPool2D(2))
model1.add(BatchNormalization())
model1.add(Dropout(0.2))

model1.add(Conv2D(64, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(Conv2D(64, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(Conv2D(64, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(MaxPool2D(2))
model1.add(BatchNormalization())
model1.add(Dropout(0.2))

model1.add(Conv2D(128, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(Conv2D(128, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(Conv2D(128, 3, padding = 'same', kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(MaxPool2D(2))
model1.add(BatchNormalization())

model1.add(Flatten())
model1.add(Dropout(0.3))

model1.add(Dense(256, kernel_initializer = 'glorot_normal', activation = 'relu'))
model1.add(Dropout(0.5))
model1.add(Dense(num_classes, activation = 'softmax'))

model1.summary()
model1.compile(optimizer = 'adam', loss = 'categorical_crossentropy' , metrics = ['accuracy'])

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 140, 20, 32)       320       
                                                                 
 conv2d_7 (Conv2D)           (None, 140, 20, 32)       9248      
                                                                 
 conv2d_8 (Conv2D)           (None, 140, 20, 32)       9248      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 70, 10, 32)       0         
 2D)                                                             
                                                                 
 batch_normalization_2 (Batc  (None, 70, 10, 32)       128       
 hNormalization)                                                 
                                                                 
 dropout (Dropout)           (None, 70, 10, 32)       

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
earlyStop = EarlyStopping(
    monitor = 'val_accuracy',
    patience = 10,
    min_delta = 1e-5,
    mode = 'max',
    restore_best_weights = True,
    verbose = 1
)

In [None]:
from tensorflow.keras import backend as keras_backend
keras_backend.set_value(model1.optimizer.learning_rate, 1e-3)
with tf_device('GPU:0'):
    history = model1.fit(X_train, Y_train, epochs=30, validation_data=(X_test, Y_test), verbose = 1, callbacks = [earlyStop])
Charts(history)

In [None]:
model1.evaluate(X_test, Y_test)

In [None]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
param_grid={'C':[0.1,1,10,100],'gamma':[0.0001,0.001,0.1,1],'kernel':['rbf','poly']}
svc=svm.SVC(probability=True)
model=GridSearchCV(svc,param_grid, verbose=1)

In [None]:
X_tr = X_train.reshape((-1, np.prod(input_shape)))
X_ts = X_test.reshape((-1, np.prod(input_shape)))

In [None]:
Y_tr = Y_train.argmax(axis=1)
Y_ts = Y_test.argmax(axis=1)

In [None]:
model.fit(X_tr, Y_tr)

In [None]:
# model1.load_weights(checkpoint_filepath)
with tf_device('CPU:0'):
    model.evaluate(X_test, Y_test, batch_size=16)

In [None]:
plt.plot(history.history['accuracy'], label='Accuracy (train data)')
plt.plot(history.history['val_accuracy'], label='Accuracy (val data)')
plt.title('Accuracy on DFBS Images')
plt.ylabel('Accuracy value')
plt.xlabel('No. epoch')
plt.legend(loc="upper left")
plt.show()

In [None]:
plt.plot(history.history['loss'], label='MSE (train data)')
plt.plot(history.history['val_loss'], label='MSE (val data)')
plt.title('MSE for DFBS Images')
plt.ylabel('MSE value')
plt.xlabel('No. epoch')
plt.legend(loc="upper left")
plt.show()

In [None]:
y_test = np.argmax(Y_test, axis=1) # Convert one-hot to index
y_pred = model.predict(X_test)
print(classification_report(Y_test, to_categorical(np.argmax(y_pred, axis=1))))

In [None]:
model.compile(optimizer ="adamax",
                 loss = "binary_crossentropy",
                  metrics = ["accuracy"])

In [None]:
with tf_device('GPU:0'):
    history = model.fit(X_train, Y_train, epochs = 10, validation_data=(X_test, Y_test), class_weight=class_weights, callbacks=[model_checkpoint_callback, LearningRateScheduler(lr_schedule)])

In [None]:
model.load_weights(checkpoint_directory + 'checkpoint')

In [None]:
model.evaluate(X_test, Y_test)

In [None]:
def getFilesByPattern(img_path, pattern="*.png"):
    all_valid_files = []
    listOfFiles = os.listdir(img_path)
    for entry in listOfFiles:
        if fnmatch(entry, pattern):
            all_valid_files.append(img_path + entry)
    return all_valid_files

def validate_dataframe(data, all_valid_files, extension):
    new_index = 0
    arr_data = []

    for index, row in data.iterrows():
        # if index == 0: continue
        raj = row["_RAJ2000"]
        dej = row["_DEJ2000"]
        cl = row["Cl"]
        name = row["Name"]
        plate = row["plate"]
        dx = row["dx"]
        dy = row["dy"]
    #     if cl == "C" or cl == "cv": 
    #         continue

        for i in range(len(all_valid_files)):
            valid_index, file_name = all_valid_files[i].split("/")[-1].split(extension)[0].split('__');
            if (name == file_name) and (index == int(valid_index)):
                arr_data.append([name, cl, all_valid_files[i]])
                break

    return pd.DataFrame(arr_data, columns=['Name', 'Cl', 'path'])

In [None]:
pattern = "*.png"
all_png_files = getFilesByPattern(astro_input_path, pattern)
all_png_files[0], len(all_png_files)

In [None]:
t1 = perf_counter()

extension = pattern[1:]
train_data = validate_dataframe(train, all_png_files, extension)
test_data = validate_dataframe(test, all_png_files, extension)

len(train_data), perf_counter() - t1

In [None]:
train_data.head()

In [None]:
train_data['Cl'].value_counts(), test_data['Cl'].value_counts()

In [None]:
le = LabelEncoder()
le.fit(train_data['Cl'])
train_data['Cl']=le.transform(train_data['Cl'])
test_data['Cl']=le.transform(test_data['Cl'])
train_data.head()

In [None]:
values = train_data['Cl'].value_counts()
num_classes = le.classes_.shape[0]
values

In [None]:
X_train_1 = train_data.loc[:, 'path'].values
Y_train_1 = train_data.loc[:, 'Cl'].values
X_test_1 = test_data.loc[:, 'path'].values
Y_test_1 = test_data.loc[:, 'Cl'].values

In [None]:
def preprocess_images(data, max_width=None, max_height=None):
    infer = False
    if max_width and max_height:
        infer = True
    else:
        max_width = 0
        max_height = 0

    images_list = []

    for i, val in enumerate(data):
        # Open using path
        im = Image.open(val)
        arr = np.array(im)
        # Normalize
        arr=(arr-arr.min())/(arr.max()-arr.min())
        # Check metadata
        if not infer:
            if arr.shape[0] > max_height:
                max_height = arr.shape[0]
            if arr.shape[1] > max_width:
                max_width = arr.shape[1]
        images_list.append(arr)

    return images_list, max_width, max_height

def fill_with_zeros(images_list, max_width, max_height):
    for i, image in enumerate(images_list):
        s = image.shape
        d_width = (max_width - s[1])
        d_height = (max_height - s[0])

        d_top = int(d_height / 2)
        d_bottom = int(d_height - d_top)

        d_left = int(d_width / 2)
        d_right = int(d_width - d_left)
        #print(d_top, d_bottom, d_left, d_right)

        arr = image
        for l in range(d_left):
            arr = np.insert(arr, 0, 0, axis = 1)

        for r in range(d_right):
            b = np.zeros((s[0],1))
            arr = np.append(arr, b, axis = 1)

        for t in range(d_top):
            arr = np.insert(arr, 0, 0, axis = 0)

        for b in range(d_bottom):
            b = np.zeros((1, arr.shape[1],))
            arr = np.append(arr, b, axis = 0)

        image = arr
    return np.array(images_list)

In [None]:
train_list, max_width, max_height = preprocess_images(X_train_1)
test_list, _, _ = preprocess_images(X_test_1, max_width, max_height)
print(max_width, max_height)

train_np = fill_with_zeros(train_list, max_width, max_height)
test_np = fill_with_zeros(test_list, max_width, max_height)

In [None]:
width = max_width
height = max_height
print(width, height)
plt.imshow(train_list[1])
plt.gray()
plt.show()

In [None]:
Y_train1 = to_categorical(Y_train_1, num_classes)
Y_test1 = to_categorical(Y_test_1, num_classes)
Y_train1.sum(axis=0), Y_test1.sum(axis=0)

In [None]:
train_np.shape

In [None]:
input_shape = (img_size[0], img_size[1], 1)
X_train1 = train_np.reshape(train_np.shape[0], input_shape[0], input_shape[1], input_shape[2])
X_test1 = test_np.reshape(test_np.shape[0], input_shape[0], input_shape[1], input_shape[2])

In [None]:
print('x_train shape:', X_train1.shape)
print(X_train1.shape[0], 'train samples')
print(X_test1.shape[0], 'test samples')

In [None]:
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y

train_gen = DataGenerator(X_train1, Y_train1, 256)
test_gen = DataGenerator(X_test1, Y_test1, 256)

In [None]:
# from keras.models import Sequential
# from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Activation
# from tensorflow.keras import optimizers

# model = Sequential()
# model.add(Conv2D(128, kernel_size=(3,3), input_shape=input_shape, padding="same"))
# model.add(MaxPooling2D(pool_size=(3, 1)))

# model.add(Conv2D(64, (3, 3), padding="same"))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(3, 1)))

# model.add(Conv2D(32, (3, 3), padding="same"))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(3, 4)))

# model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
# model.add(Dense(128, activation=tf.nn.relu))
# model.add(Dropout(0.5))
# model.add(Dense(num_classes,activation=tf.nn.softmax))

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Activation
from tensorflow.keras import optimizers

model = Sequential()
model.add(Conv2D(32, kernel_size=(3,3), padding="same", input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 1)))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 1)))

model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(num_classes,activation=tf.nn.softmax))

In [None]:
model.summary()

In [None]:
counter = Counter(Y_train1.argmax(axis=1))
max_val = float(max(counter.values()))       
class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}

num_classes = Y_train1.shape[1] ######################################################################################################

In [None]:
adadelta = optimizers.Adadelta(learning_rate=0.1, rho=0.95)
model.compile(optimizer=adadelta , loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# history = model.fit(train_gen, epochs = 30, shuffle=True, validation_data=test_gen, class_weight=class_weights)

In [None]:
# history_1 = model.fit(train_gen, epochs = 50, shuffle=True, validation_data=test_gen, class_weight=class_weights)

In [None]:
# Evaluate the model on the train data using `evaluate`
print("Evaluate on train data")
results_train = model1.evaluate(X_train1, Y_train1, batch_size=256)
print("train loss, train acc:", results_train)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print("Generate predictions for 3 samples")
predictions_train = model1.predict(X_test1[:3])
print("predictions shape:", predictions_train.shape)

print()

# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model1.evaluate(X_test1, Y_test1, batch_size=256)
print("test loss, test acc:", results)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print("Generate predictions for 3 samples")
predictions = model1.predict(X_test1[:3])
print("predictions shape:", predictions.shape)

In [None]:
from sklearn.metrics import classification_report
import numpy as np

y_train1 = np.argmax(Y_train1, axis=1) # Convert one-hot to index
y_pred_train1 = model1.predict(X_train1)
print(classification_report(Y_train1, to_categorical(np.argmax(y_pred_train1, axis=1), num_classes=num_classes)))

y_test1 = np.argmax(Y_test1, axis=1) # Convert one-hot to index
y_pred_test1 = model1.predict(X_test1)
print(classification_report(Y_test1, to_categorical(np.argmax(y_pred_test1, axis=1), num_classes=num_classes)))

In [None]:
test_data[test_data['Cl'] == 0]

In [None]:
# i = Input(input_shape)

# #32----------------------------------------------------------------------------------
# t11 = Conv2D(ks,kshape,kernel_regularizer = reg,padding = "same",activation = act)(i)
# t11 = BatchNormalization()(t11)
# t1i  = concatenate((i,t11))

# t12 = Conv2D(ks*2,kshape,kernel_regularizer = reg,padding = "same",activation = act)(t1i)
# t12 = BatchNormalization()(t12)
# t2i = concatenate((i,t11,t12))

# t13 = Conv2D(ks*4,kshape,kernel_regularizer = reg,padding = "same",activation = act)(t2i)
# t13 = BatchNormalization()(t13)
# t13 = MaxPooling2D(2,2)(t13)
# t13 = Dropout(drop_size*3)(t13)
# #16----------------------------------------------------------------------------------
# t21 = Conv2D(ks*2,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t13)
# t21 = BatchNormalization()(t21)
# t2i2  = concatenate((t13,t21))
# t22 = Conv2D(ks*4,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t2i2)
# t22 = BatchNormalization()(t22)
# t2i3 = concatenate((t13,t21,t22))
# t23 = Conv2D(ks*8,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t2i3)
# t23 = BatchNormalization()(t23)
# t23 = MaxPooling2D(2,2)(t23)
# t23 = Dropout(drop_size*3)(t23)
# #8--------------------------------------------------------------------------------------
# t31 = Conv2D(ks*2,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t23)
# t31 = BatchNormalization()(t31)
# t3i2  = concatenate((t23,t31))
# t32 = Conv2D(ks*4,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t3i2)
# t32 = BatchNormalization()(t32)
# t3i3 = concatenate((t23,t31,t32))
# t33 = Conv2D(ks*8,(3,3),kernel_regularizer = reg,padding = "same",activation = act)(t3i3)
# t33 = BatchNormalization()(t33)
# t33 = MaxPooling2D(2,2)(t33)
# t33 = Dropout(drop_size*3)(t33)
# #4------------------------------------------------------------------------------------
# # output = concatenate((t12,t))
# # output = BatchNormalization()(output)
# # output = GlobalAveragePooling2D()(t6)
# # output = GlobalMaxPooling2D()(t6)
# output = Flatten()(t33)
# # output = Dropout(drop_size*4)
# output = Dense(16*ks,kernel_regularizer = reg,activation = act)(output)
# output = BatchNormalization()(output)
# output = Dropout(3*drop_size)(output)
# output = Dense(16*ks,kernel_regularizer = reg,activation = act)(output)
# output = BatchNormalization()(output)
# output = Dropout(3*drop_size)(output)
# output = Dense(16*ks,kernel_regularizer = reg,activation = act)(output)
# output = BatchNormalization()(output)
# output = Dropout(3*drop_size)(output)

# output = Dense(num_classes,activation='softmax')(output)
# model2 = Model(i,output)

# model2.compile(optimizer ="adam",
#                  loss = "categorical_crossentropy",
#                   metrics = ["accuracy"])

In [None]:
# model2.load_weights(checkpoint_filepath)

In [None]:
# checkpoint_filepath_2 = './tmp_2/checkpoint'
# model_checkpoint_callback_2 = tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_filepath,
#     save_weights_only=True,
#     monitor='val_accuracy',
#     mode='max',
#     save_best_only=True)

In [None]:
# with tf.device('GPU:0'):
#     history_2 = model2.fit(X_train1, Y_train1, epochs = 150, batch_size = 256, shuffle=True, validation_data=(X_test1, Y_test1), class_weight=class_weights, workers=-1, callbacks=[model_checkpoint_callback_2])

In [None]:
# # Evaluate the model on the train data using `evaluate`
# print("Evaluate on train data")
# results_train = model2.evaluate(X_train1, Y_train1, batch_size=256)
# print("train loss, train acc:", results_train)

# # Generate predictions (probabilities -- the output of the last layer)
# # on new data using `predict`
# print("Generate predictions for 3 samples")
# predictions_train = model2.predict(X_test1[:3])
# print("predictions shape:", predictions_train.shape)

# print()

# # Evaluate the model on the test data using `evaluate`
# print("Evaluate on test data")
# results = model2.evaluate(X_test1, Y_test1, batch_size=256)
# print("test loss, test acc:", results)

# # Generate predictions (probabilities -- the output of the last layer)
# # on new data using `predict`
# print("Generate predictions for 3 samples")
# predictions = model2.predict(X_test1[:3])
# print("predictions shape:", predictions.shape)

In [None]:
# from sklearn.metrics import classification_report
# import numpy as np

# y_train1 = np.argmax(Y_train1, axis=1) # Convert one-hot to index
# y_pred_train1 = model2.predict(X_train1)
# print(classification_report(Y_train1, to_categorical(np.argmax(y_pred_train1, axis=1), num_classes=num_classes)))

# y_test1 = np.argmax(Y_test1, axis=1) # Convert one-hot to index
# y_pred_test1 = model2.predict(X_test1)
# print(classification_report(Y_test1, to_categorical(np.argmax(y_pred_test1, axis=1), num_classes=num_classes)))

In [None]:
# y_pred_train1.shape, y_pred_test1.shape

In [None]:
X_train2 = X_train1.reshape(X_train1.shape[0], -1)
Y_train2 = Y_train1.argmax(axis=1)
X_test2, Y_test2 = X_test1.reshape(X_test1.shape[0], -1), Y_test1.argmax(axis=1)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(kernel='poly', degree=25,gamma='auto'))
clf.fit(X_train2, Y_train2)
clf.score(X_train2, Y_train2), clf.score(X_test2, Y_test2)

In [None]:
from sklearn.metrics import classification_report
import numpy as np

y_pred_train2 = clf.predict(X_train2)
print(classification_report(Y_train2, y_pred_train2))

y_pred_test2 = clf.predict(X_test2)
print(classification_report(Y_test2, y_pred_test2))

In [None]:
a = clf.steps[1][1]