In [1]:
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
import os
from tensorflow import keras
import pathlib
print(tf.config.list_physical_devices('CPU'))
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D,AveragePooling2D
from keras.layers import BatchNormalization
from keras.callbacks import LearningRateScheduler,ReduceLROnPlateau
from keras.optimizers import Adam # I believe this is better optimizer for our case

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


# Загрузка датасета и связанных гиперпараметров

In [14]:
data_dir = pathlib.Path('datasets/images_center_10')

In [2]:
batch_size = 32
img_height = 20
img_width = 20
num_classes = 2

In [15]:
with tf.device('/device:CPU:0'):
  train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 5804 files belonging to 2 classes.
Using 4644 files for training.


In [16]:
with tf.device('/device:CPU:0'):
  val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 5804 files belonging to 2 classes.
Using 1160 files for validation.


In [17]:
class_names = train_ds.class_names
print(class_names)

['negative', 'positive']


In [18]:
with tf.device('/device:CPU:0'):
  AUTOTUNE = tf.data.AUTOTUNE

  train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
  val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Определение моделей и методов для работы с ними

In [3]:
def create_model_cnn(input_shape=(img_height, img_width), num_classes=num_classes):
  model = Sequential([
  keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 3), name='rescaling_1_1'),
  keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
  keras.layers.MaxPooling2D(),
  keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
  keras.layers.MaxPooling2D(),
  keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
  keras.layers.MaxPooling2D(),
  keras.layers.Dropout(0.2),
  keras.layers.Flatten(),
  keras.layers.Dense(128, activation='relu'),
  keras.layers.Dense(num_classes)
  ])
  model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

In [4]:
def create_model_cnn_2(input_shape=(img_height, img_width), num_classes=num_classes):
    model = Sequential()
    model.add(keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 3), name='rescaling_2_1'))
    model.add(Conv2D(32, kernel_size = (3,3), activation='relu', input_shape = input_shape))
    model.add(BatchNormalization())
    model.add(Conv2D(32, kernel_size = (3,3), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, kernel_size = (5,5), strides=2, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Conv2D(64, kernel_size = (3,3), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size = (3,3), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size = (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(Conv2D(128, kernel_size = 3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dense(num_classes, activation = "sigmoid"))
    model.compile(optimizer = 'adam' , loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    return model

In [5]:
def create_LeNet5(input_shape=(img_height, img_width), num_classes=num_classes):
    model = Sequential()
    model.add(keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 3), name='rescaling_3_1'))
    model.add(Conv2D(6, kernel_size=(5, 5), strides=(1, 1), activation='relu', input_shape=input_shape, padding="same"))
    model.add(AveragePooling2D(pool_size=(2, 2), strides=(1, 1), padding='valid'))
    model.add(Conv2D(16, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='valid'))
    model.add(AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))
    model.add(Conv2D(120, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='valid'))
    model.add(Flatten())
    model.add(Dense(84, activation='relu'))
    model.add(Dense(num_classes, activation='sigmoid'))
    model.compile(optimizer =  'adam' , loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    return model

In [19]:
def train_model(model, epochs):
    with tf.device('/device:CPU:0'):
        history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs)
        return history

# Создание моделей

In [23]:
model_cnn = create_model_cnn()
model_cnn_2 = create_model_cnn_2()
model_LeNet = create_LeNet5()

# Обучение

In [24]:
history_cnn = train_model(model_cnn, 30)

In [25]:
history_cnn_2 = train_model(model_cnn_2, 30)

In [26]:
history_LeNet = train_model(model_LeNet, 30)

In [16]:
model_cnn.save_weights('model_cnn_weights/checkpoint')
model_cnn_2.save_weights('model_cnn_2_weights/checkpoint')
model_LeNet.save_weights('model_LeNet_weights/checkpoint')

# Загрузка сохранённых моделей

In [20]:
model_cnn.load_weights('model_cnn_weights/checkpoint')
model_cnn_2.load_weights('model_cnn_2_weights/checkpoint')
model_LeNet.load_weights('model_LeNet_weights/checkpoint')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x201fbd85390>

# Создание ансамбля

In [21]:
from keras import Model, Input
from keras.layers import Average
from typing import List
from tensorflow import Tensor
from keras.utils.version_utils import training

model_input = Input(shape=(img_height, img_width, 3))
def ensemble(models: List [training.Model]) -> training.Model:

    outputs = [model.outputs[0] for model in models]
    y = Average()(outputs)

    model = Model(model_input , y, name='ensemble')

    return model

In [27]:
models = [model_cnn, model_cnn_2, model_LeNet]
ensemble_model = ensemble(models)

In [26]:
model_cnn.outputs[0]

<KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'dense_4')>

# Использование модели

In [None]:
import cooler

chr = 'X'
resolution = 10000
c = cooler.Cooler(f'data/ARAb_vs_Coluzzii/ARAB_vs_Coluzzii_4DN.mcool::/resolutions/{resolution}')
matrix = c.matrix(balance=False).fetch(chr)

src_matrix = np.log10(matrix+1)
max_value = np.nanmax(src_matrix)
min_value = np.nanmin(src_matrix)
for i in range(10, src_matrix.shape[0]-10):
        point = (i, i)
        point_area = src_matrix[point[1]-10:point[1]+10, point[0]-10:point[0]+10]
        plt.imsave(f'datasets/ac_test_images_center_{resolution//1000}/unknown/{chr}_{i*resolution}_{i*resolution}.png',  point_area, cmap='gray', vmax=max_value, vmin=min_value)

In [30]:
with open('data/ac_detected_rearrangements_ensemble_10.csv', mode='w') as output:
  output.write("chr,start,end\n")
  for path in os.listdir('datasets/ac_test_images_center_10/unknown'):
      img_path = os.path.join('datasets/ac_test_images_center_10/unknown', path)
      with tf.device('/device:CPU:0'):
        img = tf.keras.utils.load_img(
            img_path, target_size=(img_height, img_width)
        )
        img_array = tf.keras.utils.img_to_array(img)
        img_array = tf.expand_dims(img_array, 0) # Create a batch

        predictions_cnn = np.array(model_cnn(img_array))
        predictions_cnn_2 = np.array(model_cnn_2(img_array))
        predictions_LeNet = np.array(model_LeNet(img_array))
        predictions = (predictions_cnn + predictions_cnn_2 + predictions_LeNet) / 3

        score = tf.nn.softmax(predictions[0])

        if np.argmax(score) == 1:
          path_splited = path.split('_')
          output.write(f"X,{path_splited[1]},{path_splited[2].split('.')[0]}\n")