In [11]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from scipy.stats import spearmanr
from sklearn.model_selection import train_test_split
from skorch import NeuralNetRegressor
from skorch.callbacks import Checkpoint
from skorch.callbacks import EarlyStopping
from skorch.callbacks import EpochScoring
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from torch.utils.data import Dataset
from torch.utils.data import Subset

from gene_expression_prediction.data_loader import BigWigReader
from gene_expression_prediction.data_loader import GeneReader
from gene_expression_prediction.data_processor import CellLine
from gene_expression_prediction.data_processor import FeatureNames
from gene_expression_prediction.data_processor import ProcessedFeatures
from gene_expression_prediction.data_processor import load_processed_features
from gene_expression_prediction.data_processor import process_cell_line
from gene_expression_prediction.data_processor import save_processed_features

## Work Package 1.1 - Modeling Choices & Data Pre-processing

In [18]:
# TODO:
# Load your feature (bed and/or bigwig and/or fasta) and target files (tsv) here.
# Decide which features to use for training. Feel free to process them however you need.

# NOTE:
# bed and bigwig files contain signals of all chromosomes (including sex chromosomes).
# Training and validation split based on chromosomes has been done for you.
# However, you can resplit the data in any way you want.

# ---------------------------INSERT CODE HERE---------------------------
data_path = "/workspaces/Gene-Expression-Prediction/data"
info_data = GeneReader(data_path)
bigwig_data = BigWigReader(data_path)

PROMOTER_WINDOW_SIZE = 5000
PROMOTER_BIN_SIZE = 100

DISTAL_WINDOW_SIZE = 50_000
DISTAL_WINDOW_SIZE_BIN_SIZE=1000
SAMPLE_N = None

# Takes around 30 minutes to run
# cell_line_x1 = process_cell_line(
#     cell_line=CellLine.X1,
#     gene_reader=info_data,
#     bigwig_reader=bigwig_data,
#     promoter_window_size=PROMOTER_WINDOW_SIZE,
#     distal_window_size=DISTAL_WINDOW_SIZE,
#     promoter_bin_size=PROMOTER_BIN_SIZE,
#     distal_bin_size=DISTAL_WINDOW_SIZE_BIN_SIZE,
#     sample_n=SAMPLE_N,
# )
# save_processed_features(
#     cell_line_x1, "/workspaces/Gene-Expression-Prediction/data/processed_data_x1"
# )
cell_line_x1 = load_processed_features(
    "/workspaces/Gene-Expression-Prediction/data/processed_data_x1"
)


# cell_line_x2 = process_cell_line(
#     cell_line=CellLine.X2,
#     gene_reader=info_data,
#     bigwig_reader=bigwig_data,
#     promoter_window_size=PROMOTER_WINDOW_SIZE,
#     distal_window_size=DISTAL_WINDOW_SIZE,
#     promoter_bin_size=PROMOTER_BIN_SIZE,
#     distal_bin_size=DISTAL_WINDOW_SIZE_BIN_SIZE,
#     sample_n=SAMPLE_N,
# )
# save_processed_features(
#     cell_line_x2, "/workspaces/Gene-Expression-Prediction/data/processed_data_x2"
# )
cell_line_x2 = load_processed_features(
    "/workspaces/Gene-Expression-Prediction/data/processed_data_x2"
)


# cell_line_x3 = process_cell_line(
#     cell_line=CellLine.X3,
#     gene_reader=info_data,
#     bigwig_reader=bigwig_data,
#     promoter_window_size=PROMOTER_WINDOW_SIZE,
#     distal_window_size=DISTAL_WINDOW_SIZE,
#     promoter_bin_size=PROMOTER_BIN_SIZE,
#     distal_bin_size=DISTAL_WINDOW_SIZE_BIN_SIZE,
#     sample_n=None,
# )
# save_processed_features(
#     cell_line_x3, "/workspaces/Gene-Expression-Prediction/data/processed_data_x3"
# )

cell_line_x3 = load_processed_features(
    "/workspaces/Gene-Expression-Prediction/data/processed_data_x3"
)
# ----------------------------------------------------------------------

All required data paths have been successfully validated.


In [19]:
def validate_combined_features(features: ProcessedFeatures):
    """Validates that all components of ProcessedFeatures are properly aligned."""
    print("\n" + "=" * 60)
    print("VALIDATING COMBINED FEATURES ALIGNMENT")
    print("=" * 60)

    n_genes = len(features.gene_annotations)
    gene_names = features.gene_annotations["gene_name"].values

    print(f"Number of genes: {n_genes}")
    print(f"Promoter tensor shape: {features.sequence_signal_tensor.shape}")
    print(f"Gene annotations shape: {features.gene_annotations.shape}")
    print(
        f"Target expression shape: {features.target_expression.shape if features.target_expression is not None else 'None'}"
    )

    if features.sequence_signal_tensor.shape[0] != n_genes:
        print("ERROR: Promoter tensor length doesn't match gene annotations!")
        return False

    if features.target_expression is not None:
        if len(features.target_expression) != n_genes:
            print("ERROR: Target expression length doesn't match gene annotations!")
            return False

        target_genes = features.target_expression.index.values
        if not np.array_equal(gene_names, target_genes):
            print(
                "ERROR: Target expression index doesn't match gene_annotations order!"
            )
            print(f"  First 5 in annotations: {gene_names[:5]}")
            print(f"  First 5 in targets: {target_genes[:5]}")
            return False

    print("All alignments validated successfully!")
    print("=" * 60 + "\n")
    return True


def combine_cell_lines(
    cell_line_x1: ProcessedFeatures, cell_line_x2: ProcessedFeatures
) -> ProcessedFeatures:
    """
    Combines two ProcessedFeatures objects ensuring proper alignment.
    """
    print("\nCombining cell line features...")

    combined_tensor = np.concatenate(
        [cell_line_x1.sequence_signal_tensor, cell_line_x2.sequence_signal_tensor],
        axis=0,
    )

    annotations_x1 = cell_line_x1.gene_annotations.copy()
    annotations_x2 = cell_line_x2.gene_annotations.copy()
    annotations_x1["gene_name"] = annotations_x1["gene_name"] + "_x1"
    annotations_x2["gene_name"] = annotations_x2["gene_name"] + "_x2"
    combined_annotations = pd.concat(
        [annotations_x1, annotations_x2], ignore_index=True
    )

    targets_x1 = cell_line_x1.target_expression.copy()
    targets_x2 = cell_line_x2.target_expression.copy()
    targets_x1.index = targets_x1.index + "_x1"
    targets_x2.index = targets_x2.index + "_x2"
    combined_targets = pd.concat([targets_x1, targets_x2])

    combined_targets = combined_targets.reindex(combined_annotations["gene_name"])

    combined_features = ProcessedFeatures(
        gene_annotations=combined_annotations,
        sequence_signal_tensor=combined_tensor,
        target_expression=combined_targets,
        n_upstream_bins=cell_line_x1.n_upstream_bins,
        n_promoter_bins=cell_line_x1.n_promoter_bins,
        n_downstream_bins=cell_line_x1.n_downstream_bins,
        n_total_bins=cell_line_x1.n_total_bins,
    )

    if not validate_combined_features(combined_features):
        raise ValueError("Combined features validation failed! Data is misaligned.")

    return combined_features


combined_features = combine_cell_lines(cell_line_x1, cell_line_x2)


Combining cell line features...

VALIDATING COMBINED FEATURES ALIGNMENT
Number of genes: 32568
Promoter tensor shape: (32568, 190, 7, 2)
Gene annotations shape: (32568, 7)
Target expression shape: (32568,)
All alignments validated successfully!



## Work Package 1.2 - Model Building

In [22]:
# TODO:
# Select the best model to predict gene expression from the obtained features in WP 1.1.
# pytorch_dataset.py
class GeneExpressionDataset(Dataset):
    """
    PyTorch Dataset for the gene expression prediction task.

    This class takes the processed promoter signal tensor (with 2 channels: mean, max)
    and target expression values, normalizes them, and prepares them for training.
    """

    def __init__(self, processed_features: ProcessedFeatures):
        if processed_features.target_expression is None:
            raise ValueError("Target expression values are required for this dataset.")

        pt = torch.from_numpy(processed_features.sequence_signal_tensor).float()
        
        # Original shape: (N, Bins, Features, Channels) -> (N, F, C, B)
        pt = pt.permute(0, 2, 3, 1).contiguous()
        N, F, C, B = pt.shape

        # Reshape to combine features and channels for Conv1d: (N, F * C, B)
        self.promoter_tensor = pt.view(N, F * C, B)

        # --- 2. Global Normalization per Channel (mean/max) ---
        # The channels are interleaved: 0,2,4... are 'mean'; 1,3,5... are 'max'.
        mean_channels = list(range(0, F * C, C))
        max_channels = list(range(1, F * C, C))

        mean_data = self.promoter_tensor[:, mean_channels, :]
        max_data = self.promoter_tensor[:, max_channels, :]

        # Calculate statistics for normalization
        self.promoter_mean_mean = mean_data.mean().item()
        self.promoter_mean_std = mean_data.std().item()
        self.promoter_max_mean = max_data.mean().item()
        self.promoter_max_std = max_data.std().item()

        print("\nPromoter normalization stats (global per channel):")
        print(f"  Mean channel: μ={self.promoter_mean_mean:.3f}, std={self.promoter_mean_std:.3f}")
        print(f"  Max channel:  μ={self.promoter_max_mean:.3f}, std={self.promoter_max_std:.3f}")

        # Apply Z-score normalization to each channel for each feature
        for i in range(F):
            if self.promoter_mean_std > 1e-8: # Avoid division by zero
                self.promoter_tensor[:, i * C + 0, :] = (
                    self.promoter_tensor[:, i * C + 0, :] - self.promoter_mean_mean
                ) / self.promoter_mean_std

            if self.promoter_max_std > 1e-8:
                self.promoter_tensor[:, i * C + 1, :] = (
                    self.promoter_tensor[:, i * C + 1, :] - self.promoter_max_mean
                ) / self.promoter_max_std
        
        target_values = processed_features.target_expression.fillna(0.0).values
        log_targets = np.log1p(target_values)

        self.target_log_mean = log_targets.mean()
        self.target_log_std = log_targets.std()
        
        normalized_targets = (log_targets - self.target_log_mean) / self.target_log_std
        self.targets = torch.from_numpy(normalized_targets).float()
        
        print("\nTarget normalization stats:")
        print(f"  Original range: [{target_values.min():.1f}, {target_values.max():.1f}]")
        print(f"  Log-transformed range: [{log_targets.min():.3f}, {log_targets.max():.3f}]")
        print(f"  Final normalized range: [{self.targets.min():.3f}, {self.targets.max():.3f}]")


    def denormalize_targets(self, normalized_predictions):
        """Converts normalized model predictions back to the original gene expression scale."""
        if torch.is_tensor(normalized_predictions):
            normalized_predictions = normalized_predictions.cpu().numpy()

        log_predictions = (normalized_predictions * self.target_log_std) + self.target_log_mean
        
        original_scale = np.expm1(log_predictions)
        
        return np.clip(original_scale, 0, None)

    def __len__(self) -> int:
        return len(self.targets)

    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
        return self.promoter_tensor[idx], self.targets[idx]
    
    
class PromoterAttentionCNN(nn.Module):
    def __init__(self, n_bins: int, n_features: int, n_channels: int):
        super().__init__()
        in_channels = n_features * n_channels
        
        # Deeper CNN tower with residual blocks
        self.conv1 = nn.Conv1d(in_channels, 64, kernel_size=7, padding="same")
        self.norm1 = nn.GroupNorm(8, 64)
        self.dropout1 = nn.Dropout(0.2)
        
        # Residual block 1
        self.res1_conv1 = nn.Conv1d(64, 64, kernel_size=5, padding="same")
        self.res1_norm1 = nn.GroupNorm(8, 64)
        self.res1_conv2 = nn.Conv1d(64, 64, kernel_size=5, padding="same")
        self.res1_norm2 = nn.GroupNorm(8, 64)
        self.pool1 = nn.MaxPool1d(2)
        
        self.conv2 = nn.Conv1d(64, 128, kernel_size=5, padding="same")
        self.norm2 = nn.GroupNorm(8, 128)
        self.dropout2 = nn.Dropout(0.2)
        
        # Residual block 2
        self.res2_conv1 = nn.Conv1d(128, 128, kernel_size=3, padding="same")
        self.res2_norm1 = nn.GroupNorm(8, 128)
        self.res2_conv2 = nn.Conv1d(128, 128, kernel_size=3, padding="same")
        self.res2_norm2 = nn.GroupNorm(8, 128)
        self.pool2 = nn.MaxPool1d(2)
        
        # Additional conv layer
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding="same")
        self.norm3 = nn.GroupNorm(8, 256)
        self.dropout3 = nn.Dropout(0.2)
        
        self.gap = nn.AdaptiveAvgPool1d(32)
        
        # Multi-head attention with more heads
        self.self_attn = nn.MultiheadAttention(
            embed_dim=256, num_heads=8, dropout=0.1, batch_first=True  # 8 heads instead of 4
        )
        self.attn_norm = nn.LayerNorm(256)
        
        # Prediction head
        self.fc1 = nn.Linear(256 * 32, 512)
        self.fc_norm = nn.LayerNorm(512)
        self.fc_dropout = nn.Dropout(0.4)
        self.fc2 = nn.Linear(512, 128)
        self.fc_dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(128, 1)
    
    def _residual_block(self, x, conv1, norm1, conv2, norm2):
        """Residual block with skip connection."""
        identity = x
        out = F.relu(norm1(conv1(x)))
        out = norm2(conv2(out))
        out += identity  # Skip connection
        return F.relu(out)
    
    def forward(self, x):
        # Initial conv
        x = F.relu(self.norm1(self.conv1(x)))
        x = self.dropout1(x)
        
        # Residual block 1
        x = self._residual_block(x, self.res1_conv1, self.res1_norm1, 
                                 self.res1_conv2, self.res1_norm2)
        x = self.pool1(x)
        
        # Conv 2
        x = F.relu(self.norm2(self.conv2(x)))
        x = self.dropout2(x)
        
        # Residual block 2
        x = self._residual_block(x, self.res2_conv1, self.res2_norm1,
                                 self.res2_conv2, self.res2_norm2)
        x = self.pool2(x)
        
        # Conv 3
        x = F.relu(self.norm3(self.conv3(x)))
        x = self.dropout3(x)
        
        # GAP
        x = self.gap(x)  # (batch, 256, 32)
        
        # Self-attention
        x_t = x.transpose(1, 2)
        attn_out, _ = self.self_attn(x_t, x_t, x_t)
        x_t = self.attn_norm(x_t + attn_out)  # Residual
        
        # Flatten and predict
        h = torch.flatten(x_t, 1)
        h = F.relu(self.fc_norm(self.fc1(h)))
        h = self.fc_dropout(h)
        h = F.relu(self.fc2(h))
        h = self.fc_dropout2(h)
        return self.fc3(h).squeeze(1)
# ----------------------------------------------------------------------

In [23]:
def spearman_epoch_scorer(net, dataset_valid, y=None):
    """
    Custom skorch scorer to calculate Spearman correlation on the validation set.

    This function correctly handles skorch's behavior by ignoring the potentially
    incomplete 'y' parameter and reconstructing the full y_true array from the
    provided validation dataset.
    """
    y_pred = net.predict(dataset_valid).ravel()
    y_true = np.array([y_i.item() for _, y_i in dataset_valid]).ravel()

    correlation, _ = spearmanr(y_true, y_pred)

    # This is a necessary sanity check, not error hiding. spearmanr can return
    # NaN if all predictions are identical
    if np.isnan(correlation):
        return 0.0

    return float(correlation)


def validate_dataset(dataset):
    """Validates dataset for NaN, Inf, and prints value ranges."""
    print("\n=== Data Validation ===")

    has_nan = torch.isnan(dataset.promoter_tensor).any()
    has_inf = torch.isinf(dataset.promoter_tensor).any()
    print(f"Promoter - NaN: {has_nan}, Inf: {has_inf}")
    print(
        f"Promoter range: [{dataset.promoter_tensor.min():.3f}, {dataset.promoter_tensor.max():.3f}]"
    )

    has_nan = torch.isnan(dataset.targets).any()
    has_inf = torch.isinf(dataset.targets).any()
    print(f"Targets - NaN: {has_nan}, Inf: {has_inf}")
    print(f"Targets range: [{dataset.targets.min():.3f}, {dataset.targets.max():.3f}]")
    print(
        f"Targets with value 0.0: {(dataset.targets == 0.0).sum()} / {len(dataset.targets)}"
    )
    print("=====================\n")


def train_with_skorch(
    full_ds: Dataset,
    model: nn.Module,
    *,
    device: torch.device,
    batch_size: int = 64,
    max_epochs: int = 10_000,
    learning_rate: float = 1e-3,
    num_workers: int = 0,
    patience: int = 10,
    min_delta: float = 1e-4,
    checkpoint_dir: str = "checkpoints",
    monitor_name: str = "valid_spearman",
):
    N = len(full_ds)
    indices = list(range(N))
    train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42)

    train_ds = Subset(full_ds, train_idx)
    valid_ds = Subset(full_ds, val_idx)

    pin_memory = device.type == "cuda"
    callbacks = [
        EpochScoring(
            spearman_epoch_scorer,
            lower_is_better=False,
            name=monitor_name,
            use_caching=False,
        ),
        EarlyStopping(
            monitor=monitor_name,
            patience=patience,
            threshold=min_delta,
            lower_is_better=False,
        ),
        Checkpoint(
            dirname=checkpoint_dir,
            monitor=f"{monitor_name}_best",
            f_params="best_model.pt",
        ),
        LRScheduler(
            policy=torch.optim.lr_scheduler.ReduceLROnPlateau,
            mode="max",
            factor=0.5,
            patience=max(2, patience // 2),
            monitor=monitor_name,
        ),
    ]

    net = NeuralNetRegressor(
        model,
        criterion=nn.MSELoss,
        optimizer=optim.AdamW,
        optimizer__lr=learning_rate,
        optimizer__weight_decay=1e-4,
        max_epochs=max_epochs,
        batch_size=batch_size,
        device=device.type,
        train_split=predefined_split(valid_ds),
        callbacks=callbacks,
        iterator_train__num_workers=num_workers,
        iterator_valid__num_workers=num_workers,
        iterator_train__pin_memory=pin_memory,
        iterator_valid__pin_memory=pin_memory,
    )

    print(f"Combined dataset size: {N}")
    print(f"Training set size:   {len(train_idx)}")
    print(f"Validation set size: {len(val_idx)}")

    net.fit(train_ds, y=None)

    from pathlib import Path

    net.load_params(f_params=Path(checkpoint_dir) / "best_model.pt")
    best_torch_model = net.module_
    return net, best_torch_model

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

PROMOTER_WINDOW_SIZE = 5000
PROMOTER_BIN_SIZE = 100
N_BINS = (2 * PROMOTER_WINDOW_SIZE) // PROMOTER_BIN_SIZE
N_FEATURES = len(FeatureNames)
N_CHANNELS = 2 # mean, max

BATCH_SIZE = 256
LEARNING_RATE = 1e-4
NUM_WORKERS = 2
MAX_EPOCHS = 100
PATIENCE = 15
MIN_DELTA = 1e-4
MODEL_SAVE_PATH = "./best_promoter_model.pth"

print("Creating dataset...")
full_ds = GeneExpressionDataset(combined_features)
validate_dataset(full_ds)

model = PromoterAttentionCNN(
    n_bins=N_BINS,
    n_features=N_FEATURES,
    n_channels=N_CHANNELS,
)

# Run Training
print("Starting training...")
net, best_model = train_with_skorch(
    full_ds=full_ds,
    model=model,
    device=DEVICE,
    batch_size=BATCH_SIZE,
    max_epochs=MAX_EPOCHS,
    learning_rate=LEARNING_RATE,
    num_workers=NUM_WORKERS,
    patience=PATIENCE,
    min_delta=MIN_DELTA,
    checkpoint_dir="checkpoints_promoter_only",
)

torch.save(best_model.state_dict(), MODEL_SAVE_PATH)
print(f"\nSaved best model to: {MODEL_SAVE_PATH}")

Using device: cpu
Creating dataset...

Promoter normalization stats (global per channel):
  Mean channel: μ=1.015, std=5.113
  Max channel:  μ=2.123, std=8.314

Target normalization stats:
  Original range: [0.0, 19519.8]
  Log-transformed range: [0.000, 9.879]
  Final normalized range: [-0.653, 4.481]

=== Data Validation ===
Promoter - NaN: False, Inf: False
Promoter range: [-0.255, 94.493]
Targets - NaN: False, Inf: False
Targets range: [-0.653, 4.481]
Targets with value 0.0: 0 / 32568

Starting training...
Combined dataset size: 32568
Training set size:   26054
Validation set size: 6514
  epoch    train_loss    valid_loss    valid_spearman    cp      lr      dur
-------  ------------  ------------  ----------------  ----  ------  -------
      1        [36m0.5090[0m        [32m0.4296[0m            [35m0.7513[0m     +  0.0001  43.0261
      2        [36m0.4347[0m        0.4327            [35m0.7615[0m     +  0.0001  42.5475
      3        [36m0.4051[0m        [32m0.3786

## Work Package 1.3 - Prediction on Test Data (Evaluation Metric)

In [17]:
pred = None

test_genes = cell_line_x3.gene_annotations["gene_name"].values

_train_cols = list(combined_features.distal_peak_features.columns)
cell_line_x3.distal_peak_features = cell_line_x3.distal_peak_features.reindex(
    cell_line_x3.gene_annotations["gene_name"]
)[_train_cols].fillna(0.0)



# Get the training dataset to extract normalization parameters
full_train_ds = GeneExpressionDataset(combined_features)

# Create test dataset with proper normalization
_test_ds = _PredDS(cell_line_x3, full_train_ds)


class _SkorchModule(nn.Module):
    def __init__(self, core):
        super().__init__()
        self.core = core

    def forward(self, X):
        promoter_x, distal_x = X
        return self.core(promoter_x, distal_x)


# Create model
_core = HybridCNN(
    n_bins=N_BINS,
    n_features=N_FEATURES,
    n_channels=N_CHANNELS,
    n_distal_features=len(_train_cols),
)

_net = NeuralNetRegressor(
    _SkorchModule(_core),
    device=DEVICE.type,
    batch_size=BATCH_SIZE * 2,
)

_net.initialize()
_net.load_params(f_params="checkpoints/best_model.pt")

# Predict and clip negatives to zero
print("Making predictions...")
pred = _net.predict(_test_ds).ravel()
pred = np.clip(pred, 0, None)

print(f"Predictions shape: {pred.shape}")
print(f"Predictions range: [{pred.min():.3f}, {pred.max():.3f}]")

# Check if "pred" meets the specified constraints
assert isinstance(pred, np.ndarray), "Prediction array must be a numpy array"
assert np.issubdtype(pred.dtype, np.number), "Prediction array must be numeric"
assert pred.shape[0] == len(test_genes), (
    "Each gene should have a unique predicted expression"
)

AttributeError: 'ProcessedFeatures' object has no attribute 'distal_peak_features'

#### Store Predictions in the Required Format

In [None]:
# Store predictions in a ZIP.
# Upload this zip on the project website under "Your submission".
# Zip this notebook along with the conda environment (and README, optional) and upload this under "Your code".
save_dir = "/workspaces/Gene-Expression-Prediction/data/output"
file_name = "gex_predicted.csv"  # PLEASE DO NOT CHANGE THIS
zip_name = "Tokar_David_Project1.zip"
save_path = f"{save_dir}/{zip_name}"
compression_options = {"method": "zip", "archive_name": file_name}

submission_df = pd.DataFrame({"gene_name": test_genes, "gex_predicted": pred})

compression_options = {"method": "zip", "archive_name": file_name}

submission_df.to_csv(save_path, index=False, compression=compression_options)
print(f"File saved to: {save_path}")
print("\nPreview of the first 5 rows of the submission file:")
print(submission_df.head())

File saved to: /workspaces/Gene-Expression-Prediction/data/output/Tokar_David_Project1.zip

Preview of the first 5 rows of the submission file:
    gene_name  gex_predicted
0       CAPN9       0.000000
1        ILF2       0.674938
2  ST6GALNAC5       0.000000
3  MROH7-TTC4       0.000000
4        AGO4       0.000000
