<a href="https://colab.research.google.com/github/RushaliRajesh/DCEC/blob/main/ct_dcec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
from keras.layers import Conv2D, Conv2DTranspose, Dense, Flatten, Reshape
from keras.models import Sequential, Model
from keras.utils.vis_utils import plot_model
import numpy as np
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score

nmi = normalized_mutual_info_score
ari = adjusted_rand_score


def acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    !pip install scikit-learn==0.22.2
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return float(sum([w[i, j] for i, j in ind])) * 1.0 / y_pred.size

def CAE(input_shape=(80, 80, 1), filters=[32, 64, 128, 2]):
    model = Sequential()
    if input_shape[0] % 8 == 0:
        pad3 = 'same'
    else:
        pad3 = 'valid'
    model.add(Conv2D(filters[0], 5, strides=2, padding='same', activation='relu', name='conv1', input_shape=input_shape))

    model.add(Conv2D(filters[1], 5, strides=2, padding='same', activation='relu', name='conv2'))

    model.add(Conv2D(filters[2], 3, strides=2, padding=pad3, activation='relu', name='conv3'))

    model.add(Flatten())
    model.add(Dense(units=filters[3], name='embedding'))
    model.add(Dense(units=filters[2]*int(input_shape[0]/8)*int(input_shape[0]/8), activation='relu'))

    model.add(Reshape((int(input_shape[0]/8), int(input_shape[0]/8), filters[2])))
    model.add(Conv2DTranspose(filters[1], 3, strides=2, padding=pad3, activation='relu', name='deconv3'))

    model.add(Conv2DTranspose(filters[0], 5, strides=2, padding='same', activation='relu', name='deconv2'))

    model.add(Conv2DTranspose(input_shape[2], 5, strides=2, padding='same', name='deconv1'))
    model.summary()
    return model

if __name__ == "__main__":
    from time import time

    # setting the hyper parameters
    import argparse

    class Args:
      dataset='ct'
      n_clusters=2
      batch_size=3
      epochs=30
      save_dir='results/temp'
    args=Args()
    print(args)

    import os
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)


    import numpy as np


    def load_ct():

        # the data, shuffled and split between train and test sets
        import cv2
        import os
        import numpy as np
        import random

        data = []

        path_noncov = "/content/non-COVID"
        for img in os.listdir(path_noncov):
          image = cv2.imread(os.path.join(path_noncov,img), cv2.IMREAD_GRAYSCALE)
          image= cv2.resize(image,(80,80))
          data.append([image,0])
        print(len(data))

        path_cov = "/content/COVID"
        for img in os.listdir(path_cov):
          image = cv2.imread(os.path.join(path_cov,img), cv2.IMREAD_GRAYSCALE)
          image= cv2.resize(image,(80,80))
          data.append([image,1])
        print(len(data))

        random.shuffle(data)

        print("shuffled data ready")

        #from tensorflow.keras.utils import to_categorical

        x=[]
        y=[]
        for f,l in data:
            x.append(f)
            y.append(l)
        x = np.array(x).reshape(-1,80,80,1)
        y = np.array(y)
        # for f,l in noncov_arr:
        #     x_noncov.append(f)
        #     y_noncov.append(l)
        # x_noncov = np.array(x_noncov).reshape(-1,240,240,1)
        # y_noncov = to_categorical(y_noncov, num_classes=2)

        # x = np.concatenate((x_cov,x_noncov))
        # y = np.concatenate((y_cov,y_noncov))
        print(x.shape)
        print(y.shape)

        return x, y


    # # load dataset
    # from datasets import load_mnist, load_usps
    # if args.dataset == 'mnist':
    x, y = load_ct()
    #print(x)
    # elif args.dataset == 'usps':
    #     x, y = load_usps('data/usps')

    # define the model
    model = CAE(input_shape=x.shape[1:], filters=[32, 64, 128, 2])
    plot_model(model, to_file=args.save_dir + '/%s-pretrain-model.png' % args.dataset, show_shapes=True)
    model.summary()

    # compile the model and callbacks
    optimizer = 'adam'
    model.compile(optimizer=optimizer, loss='mse')
    
    # begin training
    t0 = time()
    model.fit(x, x, batch_size=args.batch_size, epochs=args.epochs)
    print('Training time: ', time() - t0)
    #model.save(args.save_dir + '/%s-pretrain-model-%d.h5' % (args.dataset, args.epochs))

    # extract features
    feature_model = Model(inputs=model.input, outputs=model.get_layer(name='embedding').output)
    features = feature_model.predict(x)
    print('feature shape=', features.shape)

    # use features for clustering
    from sklearn.cluster import KMeans
    km = KMeans(n_clusters=args.n_clusters)

    features = np.reshape(features, newshape=(features.shape[0], -1))
    pred = km.fit_predict(features)

    from sklearn.metrics.cluster import normalized_mutual_info_score   
    from sklearn.metrics import accuracy_score 
    from sklearn.metrics.cluster import adjusted_rand_score
    #from . import metrics
    print('acc=', acc(y, pred), 'nmi=', nmi(y, pred), 'ari=', ari(y, pred))

<__main__.Args object at 0x7f9ef2df2950>
1229
2481
shuffled data ready
(2481, 80, 80, 1)
(2481,)
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (None, 40, 40, 32)        832       
                                                                 
 conv2 (Conv2D)              (None, 20, 20, 64)        51264     
                                                                 
 conv3 (Conv2D)              (None, 10, 10, 128)       73856     
                                                                 
 flatten_10 (Flatten)        (None, 12800)             0         
                                                                 
 embedding (Dense)           (None, 2)                 25602     
                                                                 
 dense_10 (Dense)            (None, 12800)             38400     
                      



In [47]:
y1=np.array(y).ravel()
y1.shape,pred.shape,y.shape,pred

((4962,), (2481,), (2481, 2), array([1, 1, 0, ..., 0, 0, 0], dtype=int32))

In [None]:
from time import time
import numpy as np
import keras.backend as K
from tensorflow.keras.layers import Layer, InputSpec
from keras.models import Model
from keras.utils.vis_utils import plot_model
from sklearn.cluster import KMeans
from keras.models import Sequential
from keras.layers import Conv2D, Conv2DTranspose, Dense, Flatten, Reshape

#from ConvAE import CAE


class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.
    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        """ student t-distribution, as same as used in t-SNE algorithm.
                 q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
        Arguments:
            inputs: the variable containing data, shape=(n_samples, n_features)
        Return:
            q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
        """
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


class DCEC(object):
    def __init__(self,
                 input_shape,
                 filters=[32, 64, 128, 10],
                 n_clusters=10,
                 alpha=1.0):

        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []

        self.cae = CAE(input_shape, filters)
        hidden = self.cae.get_layer(name='embedding').output
        self.encoder = Model(inputs=self.cae.input, outputs=hidden)

        # Define DCEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden)
        self.model = Model(inputs=self.cae.input,
                           outputs=[clustering_layer, self.cae.output])

    def pretrain(self, x, batch_size=256, epochs=20, optimizer='adam', save_dir='results/temp'):
        print('...Pretraining...')
        self.cae.compile(optimizer=optimizer, loss='mse')
        #from keras.callbacks import CSVLogger
        #csv_logger = CSVLogger(args.save_dir + '/pretrain_log.csv')

        # begin training
        t0 = time()
        self.cae.fit(x, x, batch_size=batch_size, epochs=epochs)
        print('Pretraining time: ', time() - t0)
        self.cae.save(save_dir + '/pretrain_cae_model.h5')
        print('Pretrained weights are saved to %s/pretrain_cae_model.h5' % save_dir)
        self.pretrained = True

    def load_weights(self, weights_path):
        self.model.load_weights(weights_path)

    def extract_feature(self, x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict(self, x):
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def compile(self, loss=['kld', 'mse'], loss_weights=[1, 1], optimizer='adam'):
        self.model.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer)

    def fit(self, x, y=None, batch_size=256, maxiter=2e4, tol=1e-3,
            update_interval=140, cae_weights=None, save_dir='./results/temp'):

        print('Update interval', update_interval)
        save_interval = x.shape[0] / batch_size * 5
        print('Save interval', save_interval)

        # Step 1: pretrain if necessary
        t0 = time()
        if not self.pretrained and cae_weights is None:
            print('...pretraining CAE using default hyper-parameters:')
            print('   optimizer=\'adam\';   epochs=200')
            self.pretrain(x, batch_size, save_dir=save_dir)
            self.pretrained = True
        elif cae_weights is not None:
            self.cae.load_weights(cae_weights)
            print('cae_weights is loaded successfully.')

        # Step 2: initialize cluster centers using k-means
        t1 = time()
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])

        # Step 3: deep clustering
        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/dcec_log.csv', 'w')
        logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        t2 = time()
        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                if y is not None:
                    acc1 = np.round(acc(y, self.y_pred), 5)
                    nmi1 = np.round(nmi(y, self.y_pred), 5)
                    ari1 = np.round(ari(y, self.y_pred), 5)
                    loss = np.round(loss, 5)
                    logdict = dict(iter=ite, acc=acc1, nmi=nmi1, ari=ari1, L=loss[0], Lc=loss[1], Lr=loss[2])
                    logwriter.writerow(logdict)
                    print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss)

                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
                y_pred_last = np.copy(self.y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * batch_size > x.shape[0]:
                loss = self.model.train_on_batch(x=x[index * batch_size::],
                                                 y=[p[index * batch_size::], x[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size],
                                                 y=[p[index * batch_size:(index + 1) * batch_size],
                                                    x[index * batch_size:(index + 1) * batch_size]])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save DCEC model checkpoints
                print('saving model to:', save_dir + '/dcec_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/dcec_model_' + str(ite) + '.h5')

            ite += 1
        # save the trained model
        logfile.close()
        print('saving model to:', save_dir + '/dcec_model_final.h5')
        self.model.save_weights(save_dir + '/dcec_model_final.h5')
        t3 = time()
        print('Pretrain time:  ', t1 - t0)
        print('Clustering time:', t3 - t1)
        print('Total time:     ', t3 - t0)


if __name__ == "__main__":
    # setting the hyper parameters
    import argparse
    class Args:
      dataset='ct'
      n_clusters=2
      batch_size=3
      maxiter=2e4
      gamma=0.1
      update_interval=140
      tol=0.001
      cae_weights=None
      save_dir='results/temp'
    
    args = Args()
    print(args)

    import os
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # load dataset
    #from datasets import load_mnist, load_usps
    if args.dataset == 'mnist':
        x, y = load_ct()
    # elif args.dataset == 'usps':
    #     x, y = load_usps('data/usps')
    # elif args.dataset == 'mnist-test':
    #     x, y = load_mnist()
        x, y = x[1481:], y[1481:]

    # prepare the DCEC model
    dcec = DCEC(input_shape=x.shape[1:], filters=[32, 64, 128, 10], n_clusters=args.n_clusters)
    plot_model(dcec.model, to_file=args.save_dir + '/dcec_model.png', show_shapes=True)
    dcec.model.summary()

    # begin clustering.
    optimizer = 'adam'
    dcec.compile(loss=['kld', 'mse'], loss_weights=[args.gamma, 1], optimizer=optimizer)
    dcec.fit(x, y=y, tol=args.tol, maxiter=args.maxiter,
             update_interval=args.update_interval,
             save_dir=args.save_dir,
             cae_weights=args.cae_weights)
    y_pred = dcec.y_pred
    print('acc = %.4f, nmi = %.4f, ari = %.4f' % (acc(y, y_pred), nmi(y, y_pred), ari(y, y_pred)))

<__main__.Args object at 0x7f9ef121ce50>
Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (None, 40, 40, 32)        832       
                                                                 
 conv2 (Conv2D)              (None, 20, 20, 64)        51264     
                                                                 
 conv3 (Conv2D)              (None, 10, 10, 128)       73856     
                                                                 
 flatten_11 (Flatten)        (None, 12800)             0         
                                                                 
 embedding (Dense)           (None, 10)                128010    
                                                                 
 dense_11 (Dense)            (None, 12800)             140800    
                                                                 
 reshape_11 



Iter 0 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [0 0 0]
saving model to: results/temp/dcec_model_0.h5




Iter 140 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [1.90234326e+03 2.18040000e-01 1.90232129e+03]




Iter 280 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [1.39466077e+03 1.03700000e-01 1.39465039e+03]




Iter 420 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [1.15491101e+03 1.93000000e-02 1.15490906e+03]




Iter 560 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [1.02571423e+03 1.66800000e-02 1.02571252e+03]




Iter 700 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [1.01375232e+03 1.35700000e-02 1.01375098e+03]




Iter 840 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [9.3959558e+02 1.1670000e-02 9.3959448e+02]




Iter 980 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [9.0055322e+02 1.3550000e-02 9.0055188e+02]




Iter 1120 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [9.5728436e+02 1.1330000e-02 9.5728308e+02]




Iter 1260 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [9.0286902e+02 1.1130000e-02 9.0286792e+02]




Iter 1400 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [8.6667230e+02 8.2100000e-03 8.6667151e+02]




Iter 1540 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [8.3644873e+02 8.8600000e-03 8.3644781e+02]




Iter 1680 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.9543414e+02 8.2100000e-03 7.9543335e+02]




Iter 1820 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.8777972e+02 7.2300000e-03 7.8777893e+02]




Iter 1960 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.7008862e+02 7.0700000e-03 7.7008795e+02]




Iter 2100 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [8.2832935e+02 7.3700000e-03 8.2832867e+02]




Iter 2240 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [8.0997906e+02 7.0800000e-03 8.0997839e+02]




Iter 2380 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.5422552e+02 8.9700000e-03 7.5422455e+02]




Iter 2520 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.3206360e+02 7.5800000e-03 7.3206287e+02]




Iter 2660 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.4413330e+02 7.0300000e-03 7.4413257e+02]




Iter 2800 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.0962152e+02 6.1300000e-03 7.0962091e+02]




Iter 2940 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.0878442e+02 6.5500000e-03 7.0878375e+02]




Iter 3080 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.9601233e+02 5.8700000e-03 6.9601172e+02]




Iter 3220 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.1152423e+02 5.8400000e-03 7.1152362e+02]




Iter 3360 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.9156097e+02 5.4100000e-03 6.9156042e+02]




Iter 3500 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.4218396e+02 5.3600000e-03 7.4218341e+02]




Iter 3640 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.1872357e+02 5.5300000e-03 7.1872302e+02]




Iter 3780 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.6357654e+02 5.2700000e-03 6.6357599e+02]




Iter 3920 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.4889929e+02 5.4200000e-03 7.4889874e+02]




Iter 4060 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.9153705e+02 5.2100000e-03 6.9153650e+02]




Iter 4200 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [7.3239380e+02 5.3200000e-03 7.3239325e+02]




Iter 4340 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.6470496e+02 5.0800000e-03 6.6470447e+02]




Iter 4480 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.7481049e+02 4.7500000e-03 6.7481000e+02]




Iter 4620 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.4771051e+02 4.5500000e-03 6.4771008e+02]




Iter 4760 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.4321112e+02 4.8200000e-03 6.4321063e+02]




Iter 4900 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.5973230e+02 4.4700000e-03 6.5973187e+02]




Iter 5040 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.3814441e+02 4.5700000e-03 6.3814398e+02]




Iter 5180 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.3719452e+02 4.8100000e-03 6.3719403e+02]




Iter 5320 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.1337469e+02 4.4800000e-03 6.1337427e+02]




Iter 5460 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.3962946e+02 4.3500000e-03 6.3962903e+02]




Iter 5600 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.6914520e+02 4.3300000e-03 6.6914478e+02]




Iter 5740 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.5042169e+02 4.2800000e-03 6.5042126e+02]




Iter 5880 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [5.9762689e+02 4.2200000e-03 5.9762646e+02]




Iter 6020 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.0966235e+02 4.0000000e-03 6.0966193e+02]




Iter 6160 : Acc <function acc at 0x7f9ef2d16dd0> , nmi <function normalized_mutual_info_score at 0x7f9f04469320> , ari <function adjusted_rand_score at 0x7f9f0471fef0> ; loss= [6.2127393e+02 4.2600000e-03 6.2127350e+02]


In [None]:
from zipfile import ZipFile
file_name = "/content/ct"

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done')

BadZipFile: ignored

In [1]:
 ! pip install -q kaggle

In [2]:
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

In [3]:
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
! kaggle datasets list

ref                                                         title                                              size  lastUpdated          downloadCount  
----------------------------------------------------------  ------------------------------------------------  -----  -------------------  -------------  
gpreda/reddit-vaccine-myths                                 Reddit Vaccine Myths                              237KB  2021-11-21 16:54:23          16441  
crowww/a-large-scale-fish-dataset                           A Large Scale Fish Dataset                          3GB  2021-04-28 17:03:01           9923  
imsparsh/musicnet-dataset                                   MusicNet Dataset                                   22GB  2021-02-18 14:12:19           4567  
dhruvildave/wikibooks-dataset                               Wikibooks Dataset                                   2GB  2021-10-22 10:48:21           3628  
promptcloud/careerbuilder-job-listing-2020                  Careerbuilder Jo

In [6]:
!kaggle datasets download -d plameneduardo/sarscov2-ctscan-dataset

Downloading sarscov2-ctscan-dataset.zip to /content
 99% 228M/230M [00:02<00:00, 114MB/s] 
100% 230M/230M [00:02<00:00, 93.1MB/s]


In [7]:
! unzip /content/sarscov2-ctscan-dataset.zip

Archive:  /content/sarscov2-ctscan-dataset.zip
  inflating: COVID/Covid (1).png     
  inflating: COVID/Covid (10).png    
  inflating: COVID/Covid (100).png   
  inflating: COVID/Covid (1000).png  
  inflating: COVID/Covid (1001).png  
  inflating: COVID/Covid (1002).png  
  inflating: COVID/Covid (1003).png  
  inflating: COVID/Covid (1004).png  
  inflating: COVID/Covid (1005).png  
  inflating: COVID/Covid (1006).png  
  inflating: COVID/Covid (1007).png  
  inflating: COVID/Covid (1008).png  
  inflating: COVID/Covid (1009).png  
  inflating: COVID/Covid (101).png   
  inflating: COVID/Covid (1010).png  
  inflating: COVID/Covid (1011).png  
  inflating: COVID/Covid (1012).png  
  inflating: COVID/Covid (1013).png  
  inflating: COVID/Covid (1014).png  
  inflating: COVID/Covid (1015).png  
  inflating: COVID/Covid (1016).png  
  inflating: COVID/Covid (1017).png  
  inflating: COVID/Covid (1018).png  
  inflating: COVID/Covid (1019).png  
  inflating: COVID/Covid (102).png   
  i

In [22]:
import cv2
import os
import numpy as np
import random

data = []

path_noncov = "/content/non-COVID"
for img in os.listdir(path_noncov):
  image = cv2.imread(os.path.join(path_noncov,img), cv2.IMREAD_GRAYSCALE)
  image= cv2.resize(image,(80,80))
  data.append([image,0])
print(len(data))

path_cov = "/content/COVID"
for img in os.listdir(path_cov):
  image = cv2.imread(os.path.join(path_cov,img), cv2.IMREAD_GRAYSCALE)
  image= cv2.resize(image,(80,80))
  data.append([image,1])
print(len(data))

random.shuffle(data)

print("shuffled data ready")

from tensorflow.keras.utils import to_categorical

x=[]
y=[]
for f,l in data:
    x.append(f)
    y.append(l)
x = np.array(x).reshape(-1,80,80,1)
y = to_categorical(y, num_classes=2)
# for f,l in noncov_arr:
#     x_noncov.append(f)
#     y_noncov.append(l)
# x_noncov = np.array(x_noncov).reshape(-1,240,240,1)
# y_noncov = to_categorical(y_noncov, num_classes=2)

# x = np.concatenate((x_cov,x_noncov))
# y = np.concatenate((y_cov,y_noncov))
print(x.shape)
print(y.shape)


1229
2481
ok
(2481, 80, 80, 1)
(2481, 2)


In [26]:
cov_arr

[[array([[204, 204, 204, ..., 205, 205, 205],
         [204, 204, 204, ..., 206, 206, 206],
         [204, 204, 204, ..., 207, 207, 207],
         ...,
         [207, 207, 207, ..., 209, 209, 209],
         [207, 207, 207, ..., 209, 209, 209],
         [207, 207, 207, ..., 209, 209, 209]], dtype=uint8), 1],
 [array([[ 68,  68,  68, ...,  68,  68,  68],
         [ 68,  68,  68, ...,  68,  68,  68],
         [ 68,  68,  68, ...,  68,  68,  68],
         ...,
         [ 70,  88, 154, ..., 100, 108,  92],
         [ 69,  70,  83, ...,  91,  76,  71],
         [ 72,  69,  69, ...,  69,  68,  69]], dtype=uint8), 1],
 [array([[199, 183, 191, ..., 180, 197, 214],
         [188, 194, 188, ..., 200, 193, 194],
         [187, 184, 185, ..., 186, 186, 188],
         ...,
         [208, 215, 229, ..., 219, 219, 221],
         [209, 216, 218, ..., 224, 224, 224],
         [226, 220, 224, ..., 220, 220, 223]], dtype=uint8), 1],
 [array([[196, 206, 202, ..., 203, 205, 193],
         [195, 194, 193, ..

In [16]:
import cv2
img = cv2.imread("/content/COVID/Covid (88).png")
img.shape

(266, 348, 3)