In [None]:
!pip install kaggle==1.5.12



In [None]:
! mkdir ~/.kaggle

In [None]:
 from google.colab import files
files.upload()
! cp kaggle.json ~/.kaggle/

Saving kaggle.json to kaggle.json


In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
import kaggle

In [None]:
! kaggle -v


Kaggle API 1.5.4


In [None]:
!kaggle competitions download -c human-protein-atlas-image-classification

Downloading 0000a892-bacf-11e8-b2b8-ac1f6b6435d0_red.png to /content
  0% 0.00/87.9k [00:00<?, ?B/s]
100% 87.9k/87.9k [00:00<00:00, 34.6MB/s]
429 - Too Many Requests


In [3]:
import os, sys, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from imgaug import augmenters as iaa
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [4]:
INPUT_SHAPE = (299,299,3)
BATCH_SIZE = 10

### Load dataset info

In [5]:
path_to_train = 'train/'
data = pd.read_csv('train.csv')

train_dataset_info = []
for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
    train_dataset_info.append({
        'path':os.path.join(path_to_train, name),
        'labels':np.array([int(label) for label in labels])})
train_dataset_info = np.array(train_dataset_info)

FileNotFoundError: [Errno 2] File train.csv does not exist: 'train.csv'

In [6]:
from sklearn.model_selection import train_test_split
train_ids, test_ids, train_targets, test_target = train_test_split(
    data['Id'], data['Target'], test_size=0.2, random_state=42)

NameError: name 'data' is not defined

### Create datagenerator

In [17]:
class data_generator:
    
    def create_train(dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        while True:
            random_indexes = np.random.choice(len(dataset_info), batch_size)
            batch_images = np.empty((batch_size, shape[0], shape[1], shape[2]))
            batch_labels = np.zeros((batch_size, 28))
            for i, idx in enumerate(random_indexes):
                image = data_generator.load_image(
                    dataset_info[idx]['path'], shape)   
                if augument:
                    image = data_generator.augment(image)
                batch_images[i] = image
                batch_labels[i][dataset_info[idx]['labels']] = 1
            yield batch_images, batch_labels
            
    
    def load_image(path, shape):
        R = np.array(Image.open(path+'_red.png'))
        G = np.array(Image.open(path+'_green.png'))
        B = np.array(Image.open(path+'_blue.png'))
        Y = np.array(Image.open(path+'_yellow.png'))

        image = np.stack((
            R/2 + Y/2, 
            G/2 + Y/2, 
            B),-1)
        
        image = cv2.resize(image, (shape[0], shape[1]))
        image = np.divide(image, 255)
        return image  
                
            
    def augment(image):
        augment_img = iaa.Sequential([
            iaa.OneOf([
                iaa.Affine(rotate=0),
                iaa.Affine(rotate=90),
                iaa.Affine(rotate=180),
                iaa.Affine(rotate=270),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ])], random_order=True)
        
        image_aug = augment_img.augment_image(image)
        return image_aug


### Show data

In [7]:
# create train datagen
train_datagen = data_generator.create_train(
    train_dataset_info, 5, (299,299,3), augument=True)

NameError: name 'data_generator' is not defined

In [8]:
images, labels = next(train_datagen)

fig, ax = plt.subplots(1,5,figsize=(25,5))
for i in range(5):
    ax[i].imshow(images[i])
print('min: {0}, max: {1}'.format(images.min(), images.max()))

NameError: name 'train_datagen' is not defined

### Create model

In [2]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Input
from keras.layers import BatchNormalization
from keras.layers import Conv2D
from keras.models import Model
from keras.applications import InceptionResNetV2
from keras.callbacks import ModelCheckpoint
from keras.callbacks import LambdaCallback
from keras.callbacks import Callback
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import tensorflow as tf
import keras

def create_model(input_shape, n_out):
    
    pretrain_model = InceptionResNetV2(
        include_top=False, 
        weights='imagenet', 
        input_shape=input_shape)    
    
    input_tensor = Input(shape=input_shape)
    bn = BatchNormalization()(input_tensor)
    x = pretrain_model(bn)
    x = Conv2D(128, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [3]:
tf.__version__

'1.14.0'

In [5]:
keras.__version__

'2.2.5'

In [10]:
def f1(y_true, y_pred):
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

In [None]:
def show_history(history):
    fig, ax = plt.subplots(1, 3, figsize=(15,5))
    ax[0].set_title('loss')
    ax[0].plot(history.epoch, history.history["loss"], label="Train loss")
    ax[0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
    ax[1].set_title('f1')
    ax[1].plot(history.epoch, history.history["f1"], label="Train f1")
    ax[1].plot(history.epoch, history.history["val_f1"], label="Validation f1")
    ax[2].set_title('acc')
    ax[2].plot(history.epoch, history.history["acc"], label="Train acc")
    ax[2].plot(history.epoch, history.history["val_acc"], label="Validation acc")
    ax[0].legend()
    ax[1].legend()
    ax[2].legend()

In [None]:
keras.backend.clear_session()

model = create_model(
    input_shape=(299,299,3), 
    n_out=28)

model.summary()

### Train model

In [None]:
checkpointer = ModelCheckpoint(
    'InceptionResNetV2.model',
    verbose=2, save_best_only=True)

train_generator = data_generator.create_train(
    train_dataset_info[train_ids.index], BATCH_SIZE, INPUT_SHAPE, augument=False)
validation_generator = data_generator.create_train(
    train_dataset_info[test_ids.index], 256, INPUT_SHAPE, augument=False)

model.layers[2].trainable = False

model.compile(
    loss='binary_crossentropy',  
    optimizer=Adam(1e-3),
    metrics=['acc', f1])

history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    validation_data=next(validation_generator),
    epochs=15, 
    verbose=1,
    callbacks=[checkpointer])

In [None]:
show_history(history)

In [None]:
train_generator = data_generator.create_train(
    train_dataset_info[train_ids.index], BATCH_SIZE, INPUT_SHAPE, augument=True)
validation_generator = data_generator.create_train(
    train_dataset_info[test_ids.index], 256, INPUT_SHAPE, augument=False)

model.layers[2].trainable = True

model.compile(
    loss='binary_crossentropy',  
    optimizer=Adam(1e-4),
    metrics=['acc', f1])

history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    validation_data=next(validation_generator),
    epochs=180, 
    verbose=1,
    callbacks=[checkpointer])

In [None]:
show_history(history)

### Create submit

In [11]:
model = load_model(
    'InceptionResNetV2.model', 
    custom_objects={'f1': f1})









Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [12]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 299, 299, 3)       0         
_________________________________________________________________
batch_normalization_204 (Bat (None, 299, 299, 3)       12        
_________________________________________________________________
inception_resnet_v2 (Model)  (None, 8, 8, 1536)        54336736  
_________________________________________________________________
conv2d_204 (Conv2D)          (None, 8, 8, 128)         196736    
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               4194

In [15]:
submit=[]
for i in os.listdir("test"):
    test=i.split("_")
    submit.append(test[0])

In [18]:
%%time
predicted = []
for name in tqdm(submit):
    path = os.path.join('test/', name)
    image = data_generator.load_image(path, INPUT_SHAPE)
    score_predict = model.predict(image[np.newaxis])[0]
    label_predict = np.arange(28)[score_predict>=0.2]
    str_predict_label = ' '.join(str(l) for l in label_predict)
    predicted.append(str_predict_label)

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:15<00:00,  3.77s/it]

Wall time: 15.1 s





In [19]:
predicted

['2', '2', '2', '2']

In [35]:
import os
p="C:/Users/ehtisham ahmed/python/human protein atlas classification/test/00008af0-bad0-11e8-b2b8-ac1f6b6435d0_blue.png"
path='C:'
print(p.split('/'))
for i in p.split('/')[1:-1]:
    print(i)
    path=path+"\\"+i
root=path
imgpath=os.listdir(root)

['C:', 'Users', 'ehtisham ahmed', 'python', 'human protein atlas classification', 'test', '00008af0-bad0-11e8-b2b8-ac1f6b6435d0_blue.png']
Users
ehtisham ahmed
python
human protein atlas classification
test
C:\Users\ehtisham ahmed\python\human protein atlas classification\test


['00008af0-bad0-11e8-b2b8-ac1f6b6435d0_blue.png',
 '00008af0-bad0-11e8-b2b8-ac1f6b6435d0_green.png',
 '00008af0-bad0-11e8-b2b8-ac1f6b6435d0_red.png',
 '00008af0-bad0-11e8-b2b8-ac1f6b6435d0_yellow.png']

In [32]:
print("hello")

hello


In [1]:
import json

In [14]:
with open("disease.json") as f:
    data=json.load(f)
print()
print(data[list(data.keys())[0]])


Emery-Dreifuss muscular dystrophy (EDMD), dilated cardiomyopathy, familial partial lipodystrophy


In [24]:
label=[
'Nucleoplasm',  
'Nuclear membrane',   
'Nucleoli'   ,
'Nucleoli fibrillar center'   ,
'Nuclear speckles'   ,
'Nuclear bodies'   ,
'Endoplasmic reticulum',   
'Golgi apparatus ',  
'Peroxisomes ',  
'Endosomes',   
'Lysosomes  ', 
'Intermediate filaments ',  
'Actin filaments ' , 
'Focal adhesion sites',   
'Microtubules' ,  
'Microtubule ends',   
'Cytokinetic bridge',   
'Mitotic spindle',   
'Microtubule organizing center ',  
'Centrosome ',  
'Lipid droplets',   
'Plasma membrane',   
'Cell junctions',   
'Mitochondria',   
'Aggresome',   
'Cytosol',   
'Cytoplasmic bodies',   
'Rods & rings',  
]

with open("disease.json") as f:
            data=json.load(f)
result=""
for i in [2,5,7]:
    result+=data[list(data.keys())[i]] + "\n" 
print(result)

Alzheimer's and
 Huntington'sÂ disease
Huntington'sÂ diseas
ZellwegerÂ syndromespectrum 
(PBD-ZSD) and rhizomelic 
chondrodysplasia punctata type 1Â 

