# Prediction of Active Enhancers with FFNN

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from itertools import product
from typing import List
from tqdm.auto import tqdm

## Models

### Decision Trees

In [5]:
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier(
    criterion="gini",
    max_depth=50,
    random_state=42,
    class_weight="balanced"
)

### Random forests

In [14]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(
    n_estimators=500,
    criterion="gini",
    max_depth=50,
    random_state=42,
    class_weight="balanced",
    verbose=True
)

### Extra Trees

In [13]:
from sklearn.ensemble import ExtraTreesClassifier

extra_tree = ExtraTreesClassifier(
    n_estimators=500,
    criterion="gini",
    max_depth=50,
    random_state=42,
    class_weight="balanced",
    verbose=True
)

### Support Vector Machine (SVM)

In [12]:
from sklearn.svm import LinearSVC

svm = LinearSVC(
    random_state=42,
    class_weight="balanced",
    verbose=True
)

### Perceptron

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

perceptron = Sequential([
    Dense(1, activation="sigmoid")
])

perceptron.compile(
    optimizer="SGD",
    loss="binary_crossentropy"
)

### Multi-Layer Perceptron (MLP)

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

mlp = Sequential([
    Dense(128, activation="relu"),
    Dense(64, activation="relu"),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")
])

mlp.compile(
    optimizer="SGD",
    loss="binary_crossentropy"
)

### Feed-Forward Neural Network (FFNN)

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Dropout

ffnn = Sequential([
    Dense(256, activation="relu"),
    Dense(128),
    BatchNormalization(),
    Activation("relu"),
    Dense(64, activation="relu"),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")
])

ffnn.compile(
    optimizer="SGD",
    loss="binary_crossentropy"
)

## Meta-models

### Grid search

### Random search

### Tree of Parzen

### Bayesian optimization with gaussian processes

## Kernel space

### Gaussian kernels

### Nystroem Kernels (approximated Gaussian kernels)

## The experimental setup

## Retrieving the data

In [3]:
from epigenomic_dataset import load_epigenomes
from sklearn.impute import KNNImputer
from sklearn.preprocessing import RobustScaler

# The considered window size
window_size = 200

# Retrieving the input data
X, y = load_epigenomes(
    cell_line = "GM12878",
    dataset = "fantom",
    regions = "enhancers",
    window_size = window_size
)
X = X.droplevel(1, axis=1) 

# Imputation of NaN Values
X[X.columns] = KNNImputer(n_neighbors=x.shape[0]//10).fit_transform(X)

# Robust normalization of the values
X[X.columns] = RobustScaler().fit_transform(X)

HBox(children=(FloatProgress(value=0.0, description='Downloading to datasets/fantom/200/enhancers/GM12878.csv.…



HBox(children=(FloatProgress(value=0.0, description='Downloading to datasets/fantom/200/enhancers.bed.gz', lay…



## Preparing the holdouts

## Training the models

## Collecting the results

## Conclusions