In [1]:
import os
import numpy as np
import tensorflow as tf 

os.environ['CUDA_VISIBLE_DEVICES'] = "0" #use GPU:0 only
gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 
for gpu in gpus:
    tf.config.experimental.set_memory_growth(device=gpu,enable=True) 

from tensorflow.python.client import device_lib
local_device_protos = device_lib.list_local_devices()
#print GPU Devices
[print(x) for x in local_device_protos if x.device_type == 'GPU']
print('Tensorflow Version:',tf.__version__)

name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10561793600
locality {
  bus_id: 1
  links {
  }
}
incarnation: 15033290806541527015
physical_device_desc: "device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:3b:00.0, compute capability: 7.5"

Tensorflow Version: 2.4.1


In [2]:
# def. some functions 

# freeze some layers of base model
def freeze_layer(base_model,layer_name = []):
    
    base_model.trainable=True
    set_trainable = False
    for layer in base_model.layers:
        if layer.name in layer_name:
            set_trainable = True
        if set_trainable:
            layer.trainable = True
        else:
            layer.trainable = False
    return base_model

def build_model(base_model,hidden_layers_num = [64],drop_out=0.5,lr=5e-5):
    '''
    Pars:
        hidden_layers_num: type:list
        drop_out: rate of drop out layer
        lr: learning rate
    '''
    
    new_model = tf.keras.models.Sequential()
    new_model.add(base_model)
    new_model.add(tf.keras.layers.Flatten())
    new_model.add(tf.keras.layers.Dropout(drop_out))
    for num_ in hidden_layers_num:
        new_model.add(tf.keras.layers.Dense(num_,activation='relu'))
    new_model.add(tf.keras.layers.Dense(3,activation='softmax'))
    new_model.compile(loss = 'sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=lr), metrics=['acc'])
    return new_model

In [3]:
# load original data to get test data
data_la = np.load('/data1/qzhang/small_dataset_pnas/original/original_dataset.npz')['la']
labels = np.load('/data1/qzhang/small_dataset_pnas/original/original_dataset.npz')['labels']

# oever view
print('Original la shape:',data_la.shape)
print('labels:',np.unique(labels))

# preprocess labels
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
labels_cat = encoder.fit_transform(labels)
print(encoder.classes_)
print(np.unique(labels_cat))

# preprocess la data
from sklearn.preprocessing import minmax_scale
data_la =  minmax_scale(data_la,axis=1)
print('Max per sample:',data_la[0].max())
print('Min per sample:',data_la[0].min())


# split dataset
from sklearn.model_selection import train_test_split
train_data,test_data,train_label,test_label = train_test_split(data_la.reshape(-1,3000,1),labels_cat,
                                                               stratify = labels_cat,
                                                               test_size=0.2,
                                                               random_state=42)

print('Test data:',test_data.shape)



Original la shape: (2700, 3000)
labels: ['a-helix' 'b-sheet' 'other-SS']
['a-helix' 'b-sheet' 'other-SS']
[0 1 2]
Max per sample: 1.0000000000000002
Min per sample: 0.0
Test data: (540, 3000, 1)


In [9]:
# load tranfer learning data
original_transfer_la = np.load('/data1/qzhang/small_dataset_pnas/original/original_transfer_dataset.npz')['la']
original_transfer_labels = np.load('/data1/qzhang/small_dataset_pnas/original/original_transfer_dataset.npz')['labels']

homo_transfer_la = np.load('/data1/qzhang/small_dataset_pnas/homologous/homologous_transfer_dataset.npz')['la']
homo_transfer_labels = np.load('/data1/qzhang/small_dataset_pnas/homologous/homologous_transfer_dataset.npz')['labels']


nonhomo_transfer_la = np.load('/data1/qzhang/small_dataset_pnas/nonhomologous/nonhomologous_transfer_dataset.npz')['la']
nonhomo_transfer_labels = np.load('/data1/qzhang/small_dataset_pnas/nonhomologous/nonhomologous_transfer_dataset.npz')['labels']

transfer_data = np.concatenate((original_transfer_la,homo_transfer_la,nonhomo_transfer_la))
transfer_label = np.concatenate((original_transfer_labels,homo_transfer_labels,nonhomo_transfer_labels))

# process labels
transfer_label_cat = encoder.fit_transform(transfer_label)

# preprocess data
from sklearn.preprocessing import minmax_scale
transfer_data = minmax_scale(transfer_data,axis=1).reshape(-1,3000,1)

print('Transfer data size: %d, which includes \n original:%d, homo:%d, non-homo:%d'%(len(transfer_data),len(original_transfer_la),
                                                                           len(homo_transfer_la),
                                                                           len(nonhomo_transfer_la)))

# split transfer data into train_data  and val data
train_x,val_x,train_y,val_y = train_test_split(transfer_data,transfer_label_cat,stratify=transfer_label_cat,test_size = 0.2,random_state=42)



Transfer data size: 600, which includes 
 original:300, homo:150, non-homo:150


In [6]:
# load base model
raw_model = tf.keras.models.load_model('../01-PretrainedCNNModel/1DCNNDemoModel.h5')
# we just need top 6 layers
base_model = tf.keras.models.Sequential(raw_model.layers[:6])
base_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 3000, 64)          704       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 300, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 300, 64)           41024     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 30, 64)            0         
_________________________________________________________________
dropout (Dropout)            (None, 30, 64)            0         
_________________________________________________________________
flatten (Flatten)            (None, 1920)              0         
Total params: 41,728
Trainable params: 41,728
Non-trainable params: 0
__________________________________________________

In [8]:
#  freeze layer
base_model = freeze_layer(base_model=base_model)
# build a fresh CNN-FCNN model
model = build_model(base_model=base_model,hidden_layers_num = [64,32])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 1920)              41728     
_________________________________________________________________
flatten_1 (Flatten)          (None, 1920)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1920)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                122944    
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 99        
Total params: 166,851
Trainable params: 125,123
Non-trainable params: 41,728
___________________________________________

In [10]:
# pre train 5 loops
history = model.fit(train_x,train_y,validation_data=(val_x,val_y),epochs=5,batch_size=8)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
# unfreeze layers
for layer in model.layers[0].layers:
    layer.trainable  = True

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 1920)              41728     
_________________________________________________________________
flatten_1 (Flatten)          (None, 1920)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1920)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                122944    
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 99        
Total params: 166,851
Trainable params: 166,851
Non-trainable params: 0
________________________________________________

In [24]:
history = model.fit(train_x,train_y,validation_data=(val_x,val_y),epochs=30,batch_size=8)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [27]:
model.save('1DCNNTransferLearningModel.h5')

# Test transfer learning model

##  test original  test data

In [25]:
model.evaluate(test_data,test_label)



[0.6581445336341858, 0.7574074268341064]

In [26]:
# raw model
raw_model.evaluate(test_data,test_label)



[0.4704816937446594, 0.8055555820465088]

# You can also test homo or non-homo test data
 Try it!