ver0: 多 chunk modules 独立权重

## Dependency

In [1]:
import os; os.environ["CUDA_VISIBLE_DEVICES"] = "1" # 设置用GPU1
import gzip
import json
import logging
import shutil
from typing import Union
from argparse import Namespace

import numpy as np
import pandas as pd
import datatable as dt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad
from torch.utils.data import DataLoader, Dataset

from tqdm import tqdm
from joblib import Parallel, delayed
from sklearn.model_selection import train_test_split

from mamba_ssm import Mamba2
from mamba_ssm.modules.mamba2_simple import Mamba2Simple as Mamba2Block # 原Mamba2Block
from torch_optimizer import Lamb

## Data

In [2]:
SUPPORTED_FILE_FORMATS = {"vcf", "csv", "tsv"}
class DataReader:
    def __init__(self):
        self.target_is_gonna_be_phased = None
        self.target_set = None
        self.target_sample_value_index = 2
        self.ref_sample_value_index = 2
        self.target_file_extension = None
        self.allele_count = 2
        self.genotype_vals = None
        self.ref_is_phased = None
        self.reference_panel = None
        self.VARIANT_COUNT = 0
        self.is_phased = False
        self.MISSING_VALUE = None
        self.ref_is_hap = False
        self.target_is_hap = False
        self.ref_n_header_lines = []
        self.ref_n_data_header = ""
        self.target_n_header_lines = []
        self.target_n_data_header = ""
        self.ref_separator = None
        self.map_values_1_vec = np.vectorize(self.__map_hap_2_ind_parent_1)
        self.map_values_2_vec = np.vectorize(self.__map_hap_2_ind_parent_2)
        self.map_haps_to_vec = np.vectorize(self.__map_haps_2_ind)
        self.delimiter_dictionary = {"vcf": "\t", "csv": ",", "tsv": "\t", "infer": "\t"}
        self.ref_file_extension = "vcf"
        self.test_file_extension = "vcf"
        self.target_is_phased = True

    def __read_csv(self, file_path, is_vcf=False, is_reference=False, separator="\t", first_column_is_index=True,
                   comments="##") -> pd.DataFrame:
        """Read CSV/VCF files"""
        print("Reading the file...")
        data_header = None
        path_sep = "/" if "/" in file_path else os.path.sep
        line_counter = 0
        root, ext = os.path.splitext(file_path)
        with gzip.open(file_path, 'rt') if ext == '.gz' else open(file_path, 'rt') as f_in:
            while True:
                line = f_in.readline()
                if line.startswith(comments):
                    line_counter += 1
                    if is_reference:
                        self.ref_n_header_lines.append(line)
                    else:
                        self.target_n_header_lines.append(line)
                else:
                    data_header = line
                    break
        if data_header is None:
            raise IOError("The file only contains comments!")
        df = dt.fread(file=file_path, sep=separator, header=True, skip_to_line=line_counter + 1)
        df = df.to_pandas()
        if first_column_is_index:
            df.set_index(df.columns[0], inplace=True)
        return df

    def __find_file_extension(self, file_path, file_format, delimiter):
        separator = "\t"
        found_file_format = None
        if file_format not in ["infer"] + list(SUPPORTED_FILE_FORMATS):
            raise ValueError("File extension must be one of {'vcf', 'csv', 'tsv', 'infer'}.")
        if file_format == 'infer':
            file_name_tokenized = file_path.split(".")
            for possible_extension in file_name_tokenized[::-1]:
                if possible_extension in SUPPORTED_FILE_FORMATS:
                    found_file_format = possible_extension
                    separator = self.delimiter_dictionary[possible_extension] if delimiter is None else delimiter
                    break
            if found_file_format is None:
                logging.warning("Could not infer the file type. Using tsv as the last resort.")
                found_file_format = "tsv"
        else:
            found_file_format = file_format
            separator = self.delimiter_dictionary[file_format] if delimiter is None else delimiter
        return found_file_format, separator

    def assign_training_set(self, file_path: str, target_is_gonna_be_phased_or_haps: bool,
                            variants_as_columns: bool = False, delimiter=None, file_format="infer",
                            first_column_is_index=True, comments="##") -> None:
        self.target_is_gonna_be_phased = target_is_gonna_be_phased_or_haps
        self.ref_file_extension, self.ref_separator = self.__find_file_extension(file_path, file_format, delimiter)

        self.reference_panel = self.__read_csv(file_path, is_reference=True, is_vcf=False, separator=self.ref_separator,
                                               first_column_is_index=first_column_is_index,
                                               comments=comments) if self.ref_file_extension != 'vcf' else self.__read_csv(
            file_path, is_reference=True, is_vcf=True, separator='\t', first_column_is_index=False, comments="##")

        if self.ref_file_extension != "vcf":
            if variants_as_columns:
                self.reference_panel = self.reference_panel.transpose()
            self.reference_panel.reset_index(drop=False, inplace=True)
            self.reference_panel.rename(columns={self.reference_panel.columns[0]: "ID"}, inplace=True)
        else:
            self.ref_sample_value_index += 8

        self.ref_is_hap = not ("|" in self.reference_panel.iloc[0, self.ref_sample_value_index - 1] or "/" in
                               self.reference_panel.iloc[0, self.ref_sample_value_index - 1])
        self.ref_is_phased = "|" in self.reference_panel.iloc[0, self.ref_sample_value_index - 1]

        if self.ref_is_hap and not target_is_gonna_be_phased_or_haps:
            raise ValueError(
                "Reference contains haploids while target will be unphased diploids. Model cannot predict target.")

        if not (self.ref_is_phased or self.ref_is_hap) and target_is_gonna_be_phased_or_haps:
            raise ValueError(
                "Reference contains unphased diploids while target will be phased/haploid. Model cannot predict target.")

        self.VARIANT_COUNT = self.reference_panel.shape[0]
        print(
            f"{self.reference_panel.shape[1] - (self.ref_sample_value_index - 1)} {'haploid' if self.ref_is_hap else 'diploid'} samples with {self.VARIANT_COUNT} variants found!")

        self.is_phased = target_is_gonna_be_phased_or_haps and (self.ref_is_phased or self.ref_is_hap)

        original_allele_sep = "|" if self.ref_is_phased or self.ref_is_hap else "/"
        final_allele_sep = "|" if self.is_phased else "/"

        def get_diploid_alleles(genotype_vals):
            allele_set = set()
            for genotype_val in genotype_vals:
                if genotype_val not in [".", ".|.", "./."]:
                    if final_allele_sep in genotype_val:
                        v1, v2 = genotype_val.split(final_allele_sep)
                        allele_set.update([v1, v2])
                    else:
                        allele_set.add(genotype_val)  # For haploids
            return np.array(list(allele_set))

        genotype_vals = pd.unique(self.reference_panel.iloc[:, self.ref_sample_value_index - 1:].values.ravel('K'))
        print(f"DEBUG: Unique genotypes in dataset: {genotype_vals[:10]}...")  # Show first 10

        if self.ref_is_phased and not target_is_gonna_be_phased_or_haps:
            phased_to_unphased_dict = {}
            for i in range(genotype_vals.shape[0]):
                key = genotype_vals[i]
                if "|" in key and key not in [".", ".|."]:
                    v1, v2 = [int(s) for s in genotype_vals[i].split(original_allele_sep)]
                    genotype_vals[i] = f"{min(v1, v2)}/{max(v1, v2)}"
                    phased_to_unphased_dict[key] = genotype_vals[i]
            if phased_to_unphased_dict:
                self.reference_panel.iloc[:, self.ref_sample_value_index - 1:].replace(phased_to_unphased_dict,
                                                                                       inplace=True)

        self.genotype_vals = np.unique(genotype_vals)
        self.alleles = get_diploid_alleles(self.genotype_vals) if not self.ref_is_hap else self.genotype_vals
        self.allele_count = len(self.alleles)
        self.MISSING_VALUE = self.allele_count if self.is_phased else len(self.genotype_vals)

        print(f"DEBUG: self.genotype_vals: {self.genotype_vals}")
        print(f"DEBUG: self.alleles: {self.alleles}")
        print(f"DEBUG: is_phased: {self.is_phased}")

        if self.is_phased:
            self.hap_map = {str(v): i for i, v in enumerate(list(sorted(self.alleles)))}
            self.hap_map.update({".": self.MISSING_VALUE})
            self.r_hap_map = {i: k for k, i in self.hap_map.items()}
            self.map_preds_2_allele = np.vectorize(lambda x: self.r_hap_map[x])
            print(f"DEBUG: hap_map: {self.hap_map}")
        else:
            unphased_missing_genotype = "./."
            self.replacement_dict = {g: i for i, g in enumerate(list(sorted(self.genotype_vals)))}
            self.replacement_dict[unphased_missing_genotype] = self.MISSING_VALUE
            self.reverse_replacement_dict = {v: k for k, v in self.replacement_dict.items()}
            print(f"DEBUG: replacement_dict: {self.replacement_dict}")

        self.SEQ_DEPTH = self.allele_count + 1 if self.is_phased else len(self.genotype_vals) + 1
        print(f"DEBUG: self.SEQ_DEPTH: {self.SEQ_DEPTH}")

    def assign_test_set(self, file_path, variants_as_columns=False, delimiter=None,
                        file_format="infer", first_column_is_index=True, comments="##") -> None:
        """Assign test set for imputation"""
        if self.reference_panel is None:
            raise RuntimeError("First you need to use 'DataReader.assign_training_set(...) to assign a training set.'")

        self.target_file_extension, separator = self.__find_file_extension(file_path, file_format, delimiter)

        test_df = self.__read_csv(file_path, is_reference=False, is_vcf=False, separator=separator,
                                  first_column_is_index=first_column_is_index,
                                  comments=comments) if self.target_file_extension != 'vcf' else self.__read_csv(
            file_path, is_reference=False, is_vcf=True, separator='\t', first_column_is_index=False, comments="##")

        if self.target_file_extension != "vcf":
            if variants_as_columns:
                test_df = test_df.transpose()
            test_df.reset_index(drop=False, inplace=True)
            test_df.rename(columns={test_df.columns[0]: "ID"}, inplace=True)
        else:
            self.target_sample_value_index += 8

        self.target_is_hap = not ("|" in test_df.iloc[0, self.target_sample_value_index - 1] or "/" in
                                  test_df.iloc[0, self.target_sample_value_index - 1])
        is_phased = "|" in test_df.iloc[0, self.target_sample_value_index - 1]
        test_var_count = test_df.shape[0]
        print(f"{test_var_count} {'haplotype' if self.target_is_hap else 'diplotype'} variants found!")

        # Validate compatibility
        if (self.target_is_hap or is_phased) and not (self.ref_is_phased or self.ref_is_hap):
            raise RuntimeError("The training set contains unphased data. The target must be unphased as well.")
        if self.ref_is_hap and not (self.target_is_hap or is_phased):
            raise RuntimeError("The training set contains haploids. Target set should be phased or haploids.")

        # Merge with reference panel to align variants
        self.target_set = test_df.merge(right=self.reference_panel[["ID"]], on='ID', how='right')
        if self.target_file_extension == "vcf" == self.ref_file_extension:
            self.target_set[self.reference_panel.columns[:9]] = self.reference_panel[self.reference_panel.columns[:9]]

        self.target_set = self.target_set.astype('str')
        missing_value = "." if self.target_is_hap else ".|." if self.is_phased else "./."
        self.target_set.fillna(missing_value, inplace=True)
        self.target_set.replace("nan", missing_value, inplace=True)
        print("Target set assignment done!")

    def __map_hap_2_ind_parent_1(self, x) -> int:
        return self.hap_map[x.split('|')[0]]

    def __map_hap_2_ind_parent_2(self, x) -> int:
        return self.hap_map[x.split('|')[1]]

    def __map_haps_2_ind(self, x) -> int:
        return self.hap_map[x]

    def get_ref_set(self, starting_var_index=0, ending_var_index=0) -> np.ndarray:
        if 0 <= starting_var_index < ending_var_index:
            data = self.reference_panel.iloc[starting_var_index:ending_var_index, self.ref_sample_value_index - 1:]
        else:
            data = self.reference_panel.iloc[:, self.ref_sample_value_index - 1:]

        if self.is_phased:
            is_haps = "|" not in data.iloc[0, 0]
            if not is_haps:
                # diploids to hap vecs
                _x = np.empty((data.shape[1] * 2, data.shape[0]), dtype=np.int32)
                _x[0::2] = self.map_values_1_vec(data.values.T)
                _x[1::2] = self.map_values_2_vec(data.values.T)
                return _x
            else:
                return self.map_haps_to_vec(data.values.T)
        else:
            return data.replace(self.replacement_dict).values.T.astype(np.int32)

    def get_target_set(self, starting_var_index=0, ending_var_index=0) -> np.ndarray:
        """Get target data for imputation"""
        if 0 <= starting_var_index < ending_var_index:
            data = self.target_set.iloc[starting_var_index:ending_var_index, self.target_sample_value_index - 1:]
        else:
            data = self.target_set.iloc[:, self.target_sample_value_index - 1:]

        if self.is_phased:
            is_haps = "|" not in data.iloc[0, 0]
            if not is_haps:
                # diploids to hap vecs
                _x = np.empty((data.shape[1] * 2, data.shape[0]), dtype=np.int32)
                _x[0::2] = self.map_values_1_vec(data.values.T)
                _x[1::2] = self.map_values_2_vec(data.values.T)
                return _x
            else:
                return self.map_haps_to_vec(data.values.T)
        else:
            return data.replace(self.replacement_dict).values.T.astype(np.int32)

    def __convert_unphased_probs_to_genotypes(self, allele_probs) -> np.ndarray:
        """Convert unphased probabilities to genotypes"""
        n_samples, n_variants, n_alleles = allele_probs.shape
        genotypes = np.zeros((n_samples, n_variants), dtype=object)

        for i in tqdm(range(n_samples)):
            for j in range(n_variants):
                unphased_probs = allele_probs[i, j]
                variant_genotypes = np.vectorize(self.reverse_replacement_dict.get)(
                    np.argmax(unphased_probs, axis=-1)).flatten()
                genotypes[i, j] = variant_genotypes
        return genotypes

    def __convert_hap_probs_to_diploid_genotypes(self, allele_probs) -> np.ndarray:
        """Convert haplotype probabilities to diploid genotypes"""
        n_haploids, n_variants, n_alleles = allele_probs.shape

        if n_haploids % 2 != 0:
            raise ValueError("Number of haploids should be even.")

        n_samples = n_haploids // 2
        genotypes = np.empty((n_samples, n_variants), dtype=object)
        haploids_as_diploids = allele_probs.reshape((n_samples, 2, n_variants, -1))
        variant_genotypes = self.map_preds_2_allele(np.argmax(haploids_as_diploids, axis=-1))

        def process_variant_in_sample(haps_for_sample_at_variant, variant_genotypes_for_sample_at_variant):
            if n_alleles > 2:
                return '|'.join(variant_genotypes_for_sample_at_variant)
            else:
                # Output GP (genotype probabilities)
                phased_probs = np.outer(haps_for_sample_at_variant[0], haps_for_sample_at_variant[1]).flatten()
                unphased_probs = np.array([phased_probs[0], phased_probs[1] + phased_probs[2], phased_probs[-1]])
                unphased_probs_str = ",".join([f"{v:.6f}" for v in unphased_probs])
                alt_dosage = np.dot(unphased_probs, [0, 1, 2])
                return '|'.join(variant_genotypes_for_sample_at_variant) + f":{unphased_probs_str}:{alt_dosage:.3f}"

        def process_sample(i):
            return np.array([
                process_variant_in_sample(haploids_as_diploids[i, :, j, :], variant_genotypes[i, :, j])
                for j in range(n_variants)
            ])

        # Parallel processing
        genotypes = Parallel(n_jobs=-1)(delayed(process_sample)(i) for i in tqdm(range(n_samples)))
        return np.array(genotypes)

    def __convert_hap_probs_to_hap_genotypes(self, allele_probs) -> np.ndarray:
        """Convert hap probabilities to hap genotypes"""
        return np.argmax(allele_probs, axis=1).astype(str)

    def __get_headers_for_output(self, contain_probs, chr=22):
        """Get VCF headers for output file"""
        headers = [
            "##fileformat=VCFv4.2",
            '''##source=BiMamba v1.0.0''',
            '''##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated Alternate Allele Frequency">''',
            '''##INFO=<ID=MAF,Number=1,Type=Float,Description="Estimated Minor Allele Frequency">''',
            '''##INFO=<ID=AVG_CS,Number=1,Type=Float,Description="Average Call Score">''',
            '''##INFO=<ID=IMPUTED,Number=0,Type=Flag,Description="Marker was imputed">''',
            '''##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">''',
        ]
        probs_headers = [
            '''##FORMAT=<ID=DS,Number=A,Type=Float,Description="Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]">''',
            '''##FORMAT=<ID=GP,Number=G,Type=Float,Description="Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1">'''
        ]
        if contain_probs:
            headers.extend(probs_headers)
        return headers

    def __convert_genotypes_to_vcf(self, genotypes, pred_format="GT:GP:DS"):
        """Convert genotypes to VCF format"""
        new_vcf = self.target_set.copy()
        new_vcf[new_vcf.columns[self.target_sample_value_index - 1:]] = genotypes
        new_vcf["FORMAT"] = pred_format
        new_vcf["QUAL"] = "."
        new_vcf["FILTER"] = "."
        new_vcf["INFO"] = "IMPUTED"
        return new_vcf

    def preds_to_genotypes(self, predictions: Union[str, np.ndarray]) -> pd.DataFrame:
        """Convert predictions to genotypes"""
        if isinstance(predictions, str):
            preds = np.load(predictions)
        else:
            preds = predictions

        target_df = self.target_set.copy()
        if not self.is_phased:
            target_df[
                target_df.columns[self.target_sample_value_index - 1:]] = self.__convert_unphased_probs_to_genotypes(
                preds).T
        elif self.target_is_hap:
            target_df[
                target_df.columns[self.target_sample_value_index - 1:]] = self.__convert_hap_probs_to_hap_genotypes(
                preds).T
        else:
            pred_format = "GT:GP:DS" if preds.shape[-1] == 2 else "GT"
            target_df = self.__convert_genotypes_to_vcf(self.__convert_hap_probs_to_diploid_genotypes(preds).T,
                                                        pred_format)
        return target_df

    def write_ligated_results_to_file(self, df: pd.DataFrame, file_name: str, compress=True) -> str:
        """Write results to file"""
        to_write_format = self.ref_file_extension
        file_path = f"{file_name}.{to_write_format}.gz" if compress else f"{file_name}.{to_write_format}"

        with gzip.open(file_path, 'wt') if compress else open(file_path, 'wt') as f_out:
            # Write headers
            if self.ref_file_extension == "vcf":
                f_out.write(
                    "\n".join(self.__get_headers_for_output(contain_probs="GP" in df["FORMAT"].values[0])) + "\n")
            else:
                f_out.write("\n".join(self.ref_n_header_lines))

        # Append data
        df.to_csv(file_path, sep=self.ref_separator, mode='a', index=False)
        return file_path

In [3]:
class GenomicDataset(Dataset):
    """Dataset class for genomic data with masking for training"""

    def __init__(self, data, targets, seq_depth, offset_before=0, offset_after=0,
                 training=True, masking_rates=(0.5, 0.99)):
        self.data = data
        self.targets = targets
        self.seq_depth = seq_depth
        self.offset_before = offset_before
        self.offset_after = offset_after
        self.training = training
        self.masking_rates = masking_rates

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = self.data[idx].copy()
        y = self.targets[idx]

        if self.training:
            # Apply masking
            seq_len = len(x)
            masking_rate = np.random.uniform(*self.masking_rates)
            mask_size = int(seq_len * masking_rate)
            mask_indices = np.random.choice(seq_len, mask_size, replace=False)
            x[mask_indices] = self.seq_depth - 1  # Missing value token

        # Convert to one-hot
        x_onehot = np.eye(self.seq_depth)[x]
        y_onehot = np.eye(self.seq_depth - 1)[y]

        return torch.FloatTensor(x_onehot), torch.FloatTensor(y_onehot)

class ImputationDataset(Dataset):
    """Dataset for imputation (no masking needed)"""

    def __init__(self, data, seq_depth):
        self.data = data
        self.seq_depth = seq_depth

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = self.data[idx]
        # Convert to one-hot without masking
        x_onehot = np.eye(self.seq_depth)[x]
        return torch.FloatTensor(x_onehot)

## Model

In [4]:
class BiMambaBlock(nn.Module):
    """Bidirectional Mamba block for genomic sequence processing"""

    def __init__(self, d_model, d_state=16, d_conv=4, expand=2):
        super().__init__()
        self.d_model = d_model

        # Forward and backward Mamba blocks
        self.mamba_forward = Mamba2(
            d_model=d_model,
            d_state=d_state,
            d_conv=d_conv,
            expand=expand
        )

        self.mamba_backward = Mamba2(
            d_model=d_model,
            d_state=d_state,
            d_conv=d_conv,
            expand=expand
        )

        # Layer normalization
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

        # FFN
        self.ffn = nn.Sequential(
            nn.Linear(d_model * 2, d_model * 4),
            nn.GELU(),
            nn.Linear(d_model * 4, d_model),
            nn.GELU()
        )

        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        # x shape: (batch, seq_len, d_model)
        residual = x

        # Bidirectional processing
        x_norm = self.norm1(x)

        # Forward direction
        forward_out = self.mamba_forward(x_norm)

        # Backward direction (flip sequence)
        x_backward = torch.flip(x_norm, dims=[1])
        backward_out = self.mamba_backward(x_backward)
        backward_out = torch.flip(backward_out, dims=[1])

        # Concatenate bidirectional outputs
        bi_out = torch.cat([forward_out, backward_out], dim=-1)

        # FFN
        ffn_out = self.ffn(bi_out)
        ffn_out = self.dropout(ffn_out)

        # Residual connection
        out = self.norm2(residual + ffn_out)

        return out

class ConvBlock(nn.Module):
    """Convolutional block for local pattern extraction"""

    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model

        self.conv1 = nn.Conv1d(d_model, d_model, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(d_model, d_model, kernel_size=5, padding=2)
        self.conv3 = nn.Conv1d(d_model, d_model, kernel_size=7, padding=3)

        self.conv_large1 = nn.Conv1d(d_model, d_model, kernel_size=7, padding=3)
        self.conv_large2 = nn.Conv1d(d_model, d_model, kernel_size=15, padding=7)

        self.conv_final = nn.Conv1d(d_model, d_model, kernel_size=3, padding=1)
        self.conv_reduce = nn.Conv1d(d_model, d_model, kernel_size=1)

        self.bn1 = nn.BatchNorm1d(d_model)
        self.bn2 = nn.BatchNorm1d(d_model)

        self.gelu = nn.GELU()

    def forward(self, x):
        # x shape: (batch, seq_len, d_model)
        x = x.transpose(1, 2)  # (batch, d_model, seq_len)

        xa = self.gelu(self.conv1(x))

        xb = self.gelu(self.conv2(xa))
        xb = self.gelu(self.conv3(xb))

        xc = self.gelu(self.conv_large1(xa))
        xc = self.gelu(self.conv_large2(xc))

        xa = xb + xc
        xa = self.gelu(self.conv_final(xa))
        xa = self.bn1(xa)
        xa = self.gelu(self.conv_reduce(xa))
        xa = self.bn2(xa)
        xa = self.gelu(xa)

        return xa.transpose(1, 2)  # (batch, seq_len, d_model)

class CrossAttentionLayer(nn.Module):
    """Cross attention for integrating local and global features"""
    def __init__(self, d_model, n_heads=8):
        super().__init__()
        self.d_model = d_model
        self.n_heads = n_heads

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)

        self.cross_attention = nn.MultiheadAttention(
            embed_dim=d_model,
            num_heads=n_heads,
            batch_first=True
        )

        self.ffn = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.GELU(),
            nn.Linear(d_model // 2, d_model),
            nn.GELU()
        )

    def forward(self, local_repr, global_repr, start_offset=0, end_offset=0):
        local_norm = self.norm1(local_repr)
        global_norm = self.norm2(global_repr)

        # Apply offsets
        if start_offset > 0 or end_offset > 0:
            query = local_norm[:, start_offset:local_norm.shape[1] - end_offset]
        else:
            query = local_norm

        key = value = global_norm

        # Cross attention
        attn_output, _ = self.cross_attention(query, key, value)

        # Pad attn_output back to original length if offsets were applied
        if start_offset > 0 or end_offset > 0:
            pad_left = start_offset
            pad_right = end_offset
            attn_output = torch.nn.functional.pad(attn_output, (0, 0, pad_left, pad_right), mode='constant', value=0)

        # Skip connection
        attn_output = attn_output + local_norm  # Changed from +query to +local_norm

        # FFN
        attn_output = self.norm3(attn_output)
        ffn_output = self.ffn(attn_output)
        output = ffn_output + attn_output

        return output

class Mamba2CrossBlock(nn.Module):
    """
    用 Mamba2Simple 替代 MultiheadAttention 的交叉块。
    接口与原来 CrossAttentionLayer 保持一致，可直接替换。
    """
    def __init__(
        self,
        d_model,
        d_state=64,
        d_conv=4,
        expand=2,
        headdim=128,
        ngroups=1,
        chunk_size=256,
        dropout=0.0,
        device=None,
        dtype=None,
    ):
        super().__init__()
        self.d_model = d_model

        # 1. 归一化
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)

        # 2. Mamba2Simple 作为交叉建模核心
        self.ssd = Mamba2Block(
            d_model=d_model,
            d_state=d_state,
            d_conv=d_conv,
            expand=expand,
            headdim=headdim,
            ngroups=ngroups,
            chunk_size=chunk_size,
            use_mem_eff_path=True,
            device=device,
            dtype=dtype,
        )

        # 3. FFN 保持不变
        self.ffn = nn.Sequential(
            nn.Linear(d_model, d_model // 2),
            nn.GELU(),
            nn.Linear(d_model // 2, d_model),
        )

    def forward(self, local_repr, global_repr, start_offset=0, end_offset=0):
        """
        local_repr: [B, L, D]
        global_repr: [B, G, D]
        输出:       [B, L, D]  （长度与 local_repr 保持一致）
        """
        B, L, D = local_repr.shape

        # 1. 归一化
        local_norm  = self.norm1(local_repr)
        global_norm = self.norm2(global_repr)

        # 2. 如果用了 offset，先截断 query 长度
        if start_offset > 0 or end_offset > 0:
            query = local_norm[:, start_offset:L - end_offset]
        else:
            query = local_norm

        # 3. 拼接：global 在前，local 在后，让 SSD 扫描时 local 能看到 global
        x = torch.cat([global_norm, query], dim=1)   # [B, G + L', D]
        x = self.ssd(x)                              # [B, G + L', D]
        x = x[:, global_norm.shape[1]:, :]           # 只取 local 对应部分 [B, L', D]

        # 4. pad 回原始长度（如果之前截断过）
        if start_offset > 0 or end_offset > 0:
            x = F.pad(x, (0, 0, start_offset, end_offset))  # [B, L, D]

        # 5. 残差连接
        x = x + local_norm

        # 6. FFN
        x = self.norm3(x)
        x = self.ffn(x) + x

        return x

class GenoEmbedding(nn.Module):
    """Genomic embedding layer with positional encoding"""

    def __init__(self, n_alleles, n_snps, d_model):
        super().__init__()
        self.d_model = d_model
        self.n_alleles = n_alleles
        self.n_snps = n_snps

        # Allele embedding
        self.allele_embedding = nn.Parameter(torch.randn(n_alleles, d_model))

        # Positional embedding
        self.position_embedding = nn.Embedding(n_snps, d_model)

        # Initialize parameters
        nn.init.xavier_uniform_(self.allele_embedding)

    def forward(self, x):
        # x shape: (batch, seq_len, n_alleles) - one-hot encoded
        batch_size, seq_len, _ = x.shape

        # Allele embedding
        embedded = torch.einsum('bsn,nd->bsd', x, self.allele_embedding)

        # Positional embedding
        positions = torch.arange(seq_len, device=x.device)
        pos_emb = self.position_embedding(positions).unsqueeze(0)

        return embedded + pos_emb

class ChunkModule(nn.Module):
    """Single chunk processing module with BiMamba"""

    def __init__(self, d_model, start_offset=0, end_offset=0, dropout_rate=0.1):
        super().__init__()
        self.d_model = d_model
        self.start_offset = start_offset
        self.end_offset = end_offset

        # BiMamba block
        self.bimamba_block = BiMambaBlock(d_model)

        # Convolutional blocks
        self.conv_block1 = ConvBlock(d_model)
        self.conv_block2 = ConvBlock(d_model)
        self.conv_block3 = ConvBlock(d_model)

        # Cross attention
        # self.cross_attention = CrossAttentionLayer(d_model, n_heads)
        self.cross_attention = Mamba2CrossBlock(
            d_model=d_model,
            d_state=64,
            d_conv=4,
            expand=2,
            headdim=128,
            ngroups=1,
            chunk_size=256,
        )

        # Additional layers
        self.dense = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout_rate)
        self.gelu = nn.GELU()

    def forward(self, x):
        # BiMamba processing
        xa0 = self.bimamba_block(x)

        # First conv block
        xa = self.conv_block1(xa0)
        xa_skip = self.conv_block2(xa)

        # Dense layer
        xa = self.gelu(self.dense(xa))
        xa = self.conv_block2(xa)

        # Cross attention
        xa = self.cross_attention(xa, xa0, self.start_offset, self.end_offset)
        xa = self.dropout(xa)

        # Final conv block
        xa = self.conv_block3(xa)

        # Concatenate with skip connection
        xa = torch.cat([xa_skip, xa], dim=-1)

        return xa

class EvoFill(nn.Module):
    """Main BiMamba model for genomic imputation"""

    def __init__(self,
                 d_model,
                 chunk_size=2048,
                 chunk_overlap=64,
                 offset_before=0,
                 offset_after=0,
                 dropout_rate=0.1):
        super().__init__()
        self.d_model = d_model
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.offset_before = offset_before
        self.offset_after = offset_after
        self.dropout_rate = dropout_rate

        # Will be set during build
        self.seq_len = None
        self.n_alleles = None
        self.embedding = None
        self.chunk_modules = nn.ModuleList()
        self.final_conv = None
        self.output_conv = None

    def build(self, seq_len, n_alleles):
        """Build the model with specific sequence length and allele count"""
        self.seq_len = seq_len
        self.n_alleles = n_alleles

        # Embedding layer
        self.embedding = GenoEmbedding(n_alleles, seq_len, self.d_model)

        # Calculate chunks
        chunk_starts = list(range(0, seq_len, self.chunk_size))
        chunk_ends = [min(cs + self.chunk_size, seq_len) for cs in chunk_starts]
        mask_starts = [max(0, cs - self.chunk_overlap) for cs in chunk_starts]
        mask_ends = [min(ce + self.chunk_overlap, seq_len) for ce in chunk_ends]

        # Create chunk modules
        for i, cs in enumerate(chunk_starts):
            start_offset = cs - mask_starts[i]
            end_offset = mask_ends[i] - chunk_ends[i]

            chunk_module = ChunkModule(
                d_model=self.d_model,
                start_offset=start_offset,
                end_offset=end_offset,
                dropout_rate=self.dropout_rate
            )
            self.chunk_modules.append(chunk_module)

        # Store chunk information
        self.chunk_starts = chunk_starts
        self.chunk_ends = chunk_ends
        self.mask_starts = mask_starts
        self.mask_ends = mask_ends

        # Final layers
        self.final_conv = nn.Conv1d(self.d_model * 2, self.d_model // 2,
                                    kernel_size=5, padding=2)
        self.output_conv = nn.Conv1d(self.d_model // 2, n_alleles - 1,
                                     kernel_size=5, padding=2)
        # self.output_proj = nn.Linear(self.d_model * 2, n_alleles - 1)

        self.gelu = nn.GELU()
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        # x shape: (batch, seq_len, n_alleles)
        if self.embedding is None:
            raise RuntimeError("Model not built. Call build() first.")

        # Embedding
        x_embedded = self.embedding(x)

        # Process chunks
        chunk_outputs = []
        for i, chunk_module in enumerate(self.chunk_modules):
            chunk_input = x_embedded[:, self.mask_starts[i]:self.mask_ends[i]]
            chunk_output = chunk_module(chunk_input)
            chunk_outputs.append(chunk_output)

        # Concatenate chunks along sequence dimension
        x_concat = torch.cat(chunk_outputs, dim=1)

        # # Final processing
        x_concat = x_concat.transpose(1, 2)  # (batch, features, seq_len)
        x_final = self.gelu(self.final_conv(x_concat))
        x_output = self.output_conv(x_final)
        x_output = x_output.transpose(1, 2)  # (batch, seq_len, n_alleles-1)
        # x_output = self.output_proj(x_concat) 

        # Apply offsets
        if self.offset_before > 0 or self.offset_after > 0:
            x_output = x_output[:, self.offset_before:self.seq_len - self.offset_after]
        else:
            x_output = x_output[:, :self.seq_len]

        x_output = self.softmax(x_output)

        return x_output

In [5]:
n_alleles = 4  # 包含missing
model = EvoFill(
    d_model=256,
    chunk_size=5120,
    chunk_overlap=64, 
    offset_before=0,
    offset_after=0,
    dropout_rate=0.1,
).cuda()

B, L = 2, 5120
model.build(seq_len=L, n_alleles=n_alleles)
model = model.cuda()  

# 1. 生成输入
x = torch.randint(0, n_alleles, (B, L)).long().cuda()   # {0,1,2,3} 3=missing

# 2. -1 -> 3，并构造 one-hot（4 维）
x_map = x.clone()
x_onehot = torch.zeros(B, L, n_alleles, device='cuda')
x_onehot.scatter_(2, x_map.unsqueeze(-1), 1)

# 3. 前向
with torch.no_grad():
    probs = model(x_onehot)          # shape: (B,L,3)

# 4. 简单校验
assert torch.allclose(probs.sum(dim=-1), torch.ones(B, L, device='cuda'), atol=1e-5), \
    "概率未归一"
print("✅ 含缺失数据前向通过，输出形状:", probs.shape)

✅ 含缺失数据前向通过，输出形状: torch.Size([2, 5120, 3])


## Loss

In [5]:
class ImputationLoss(nn.Module):
    """Custom loss function for genomic imputation"""

    def __init__(self, use_r2=True, 
                 use_focal=False, #  all dummy 
                 group_size=None,
                 gamma=None,
                 alpha=None,
                 eps=None,
                 use_gradnorm=None,
                 gn_alpha=None,
                 gn_lr_w=None,):
        super().__init__()
        self.use_r2_loss = use_r2
        self.ce_loss = nn.CrossEntropyLoss(reduction='sum')
        self.kl_loss = nn.KLDivLoss(reduction='sum')

    def calculate_minimac_r2(self, pred_alt_allele_probs, gt_alt_af):
        """Calculate Minimac-style RÂ² metric"""
        mask = torch.logical_or(torch.eq(gt_alt_af, 0.0), torch.eq(gt_alt_af, 1.0))
        gt_alt_af = torch.where(mask, 0.5, gt_alt_af)
        denom = gt_alt_af * (1.0 - gt_alt_af)
        denom = torch.where(denom < 0.01, 0.01, denom)
        r2 = torch.mean(torch.square(pred_alt_allele_probs - gt_alt_af), dim=0) / denom
        r2 = torch.where(mask, torch.zeros_like(r2), r2)
        return r2

    def forward(self, y_pred, y_true):
        y_true = y_true.float()

        # Convert to proper format for losses
        y_true_ce = torch.argmax(y_true, dim=-1)  # For CrossEntropy
        y_pred_log = torch.log(y_pred + 1e-8)  # For KL divergence

        # Basic losses
        ce_loss = self.ce_loss(y_pred.view(-1, y_pred.size(-1)), y_true_ce.view(-1))
        kl_loss = self.kl_loss(y_pred_log.view(-1, y_pred.size(-1)),
                               y_true.view(-1, y_true.size(-1)))

        total_loss = ce_loss + kl_loss

        if self.use_r2_loss:
            batch_size = y_true.size(0)
            group_size = 4
            num_full_groups = batch_size // group_size

            if num_full_groups > 0:
                y_true_grouped = y_true[:num_full_groups * group_size].view(
                    num_full_groups, group_size, *y_true.shape[1:])
                y_pred_grouped = y_pred[:num_full_groups * group_size].view(
                    num_full_groups, group_size, *y_pred.shape[1:])

                r2_loss = 0.0
                for i in range(num_full_groups):
                    gt_alt_af = torch.count_nonzero(
                        torch.argmax(y_true_grouped[i], dim=-1), dim=0
                    ).float() / group_size

                    pred_alt_allele_probs = torch.sum(y_pred_grouped[i][:, :, 1:], dim=-1)
                    r2_loss += -torch.sum(self.calculate_minimac_r2(
                        pred_alt_allele_probs, gt_alt_af)) * group_size

                total_loss += r2_loss

        return total_loss, None

## Train

In [6]:
def remove_similar_rows(array):
    """Remove duplicate haploids from training set"""
    print("Finding duplicate haploids in training set.")
    unique_array = np.unique(array, axis=0)
    print(f"Removed {len(array) - len(unique_array)} rows. {len(unique_array)} training samples remaining.")
    return unique_array

def create_directories(save_dir, models_dir="models", outputs="out") -> None:
    """Create necessary directories"""
    for dd in [save_dir, f"{save_dir}/{models_dir}", f"{save_dir}/{outputs}"]:
        if not os.path.exists(dd):
            os.makedirs(dd)

def clear_dir(path) -> None:
    """Clear directory if it exists"""
    if os.path.exists(path):
        shutil.rmtree(path)

def load_chunk_info(save_dir, break_points):
    """Load chunk training status information"""
    chunk_info = {ww: False for ww in list(range(len(break_points) - 1))}
    if os.path.isfile(f"{save_dir}/models/chunks_info.json"):
        with open(f"{save_dir}/models/chunks_info.json", 'r') as f:
            loaded_chunks_info = json.load(f)
            if isinstance(loaded_chunks_info, dict) and len(loaded_chunks_info) == len(chunk_info):
                pprint("Resuming the training...")
                chunk_info = {int(k): v for k, v in loaded_chunks_info.items()}
    return chunk_info

def save_chunk_status(save_dir, chunk_info) -> None:
    """Save chunk training status information"""
    with open(f"{save_dir}/models/chunks_info.json", "w") as outfile:
        json.dump(chunk_info, outfile)

def create_model(args, seq_len, n_alleles):
    """Create BiMamba model"""
    model = EvoFill(
        d_model=args.embed_dim,
        chunk_size=args.cs,
        chunk_overlap=args.co,
        offset_before=getattr(args, 'offset_before', 0),
        offset_after=getattr(args, 'offset_after', 0),
        dropout_rate=0.1
    )

    # Build the model
    model.build(seq_len, n_alleles)
    return model

In [7]:
MAF_BINS = [(0.00, 0.05), (0.05, 0.10), (0.10, 0.20),
            (0.20, 0.30), (0.30, 0.40), (0.40, 0.50)]

def precompute_maf(gts_np, mask_int=-1):
    """
    gts_np: (N, L)  int64
    return:
        maf: (L,) float32
        bin_cnt: list[int] 长度 6，对应 6 个 bin 的位点数量
    """
    L = gts_np.shape[1]
    maf = np.zeros(L, dtype=np.float32)
    bin_cnt = [0] * 6

    for l in range(L):
        alleles = gts_np[:, l]
        alleles = alleles[alleles != mask_int]   # 去掉缺失
        if alleles.size == 0:
            maf[l] = 0.0
            continue

        uniq, cnt = np.unique(alleles, return_counts=True)
        total = cnt.sum()
        freq = cnt / total
        freq[::-1].sort()
        maf_val = freq[1] if len(freq) > 1 else 0.0
        maf[l] = maf_val

        # 统计 bin
        for i, (lo, hi) in enumerate(MAF_BINS):
            if lo <= maf_val < hi:
                bin_cnt[i] += 1
                break

    return maf, bin_cnt

def imputation_maf_accuracy_epoch(all_logits, all_gts, global_maf, mask=None):
    """
    all_logits: (N, L, C)
    all_gts:    (N, L, C) one-hot
    global_maf: (L,)
    mask:       (N, L) 或 None
    return:     list[float] 长度 6
    """
    # 1. 预测 vs 真实
    all_gts = all_gts.argmax(dim=-1)      # (N, L)
    preds   = all_logits.argmax(dim=-1)   # (N, L)

    # 2. 如果没有外部 mask，就默认全 1
    if mask is None:
        mask = torch.ones_like(all_gts, dtype=torch.bool)   # (N, L)
    correct = (preds == all_gts) & mask                   # (N, L)

    # 3. MAF 条件 -> (1, L) 再广播到 (N, L)
    maf = global_maf.unsqueeze(0)                         # (1, L)

    # 4. 分 bin 计算
    accs = []
    for lo, hi in MAF_BINS:
        maf_bin = mask & (maf >= lo) & (maf < hi)                # (1, L)
        n_cor = (correct & maf_bin).sum()
        n_tot = maf_bin.sum()
        accs.append(100*(n_cor / n_tot).item() if n_tot > 0 else 0.0)
    return accs

In [10]:
# ---------------- 以下即命令行参数对应的行内变量 ----------------
mode                 = 'train'
restart_training     = True          # 对应命令行 1
ref                  = "/home/qmtang/GitHub/STICI-HPC/data/training_sets/ALL.chr22.training.samples.100k.any.type.0.01.maf.variants.vcf.gz"
tihp                 = True          # 对应命令行 1
which_chunk          = -1            # All chunkss
save_dir             = '/home/qmtang/mnt_qmtang/EvoFill/data/251016_ver0_chr22'
co                   = 64            # 64 in STICI
cs                   = 8192          # 2048 in STICI
sites_per_model      = 65536        # 10240 in STICI
max_mr               = 0.7
min_mr               = 0.3
epochs               = 100
embed_dim            = 64            # 128 in STICI
lr                   = 0.001
weight_decay         = 1e-5
batch_size_per_gpu   = 8
use_r2               = True
earlystop_patience   = 9
verbose              = 1
# -------------------------------------------------------------

# 组装成 Namespace
args = Namespace(
    restart_training=restart_training,
    ref=ref,
    tihp=tihp,
    which_chunk=which_chunk,
    save_dir=save_dir,
    co=co,
    cs=cs,
    sites_per_model=sites_per_model,
    max_mr=max_mr,
    min_mr=min_mr,
    epochs=epochs,
    embed_dim=embed_dim,
    lr=lr,
    weight_decay=weight_decay,
    batch_size_per_gpu=batch_size_per_gpu,
    use_r2=use_r2,
    earlystop_patience=earlystop_patience,
    verbose=verbose,
)

assert args.max_mr > 0
assert args.min_mr > 0
assert args.max_mr >= args.min_mr

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create directories
create_directories(args.save_dir)
with open(f"{args.save_dir}/commandline_args.json", 'w') as f:
    json.dump(vars(args), f, indent=4)

# Load data
dr = DataReader()
dr.assign_training_set(
    file_path=args.ref,
    target_is_gonna_be_phased_or_haps=args.tihp,
    variants_as_columns=getattr(args, 'ref_vac', False),
    delimiter=getattr(args, 'ref_sep', None),
    file_format=getattr(args, 'ref_file_format', 'infer'),
    first_column_is_index=getattr(args, 'ref_fcai', True),
    comments=getattr(args, 'ref_comment', '##')
)

# Split data for validation
n_samples = dr.get_ref_set(0, 1).shape[0]
val_n_samples = args.batch_size_per_gpu * getattr(args, 'val_n_batches', 8)
x_train_indices, x_valid_indices = train_test_split(
    range(n_samples),
    test_size=val_n_samples,
    random_state=getattr(args, 'random_seed', 3047),
    shuffle=True
)

# Process chunks
break_points = list(np.arange(0, dr.VARIANT_COUNT, args.sites_per_model)) + [dr.VARIANT_COUNT]
chunks_done = load_chunk_info(args.save_dir, break_points)

Using device: cuda
Reading the file...
2404 diploid samples with 99314 variants found!
DEBUG: Unique genotypes in dataset: ['0|0' '0|1' '1|0' '1|1']...
DEBUG: self.genotype_vals: ['0|0' '0|1' '1|0' '1|1']
DEBUG: self.alleles: ['0' '1']
DEBUG: is_phased: True
DEBUG: hap_map: {'0': 0, '1': 1, '.': 2}
DEBUG: self.SEQ_DEPTH: 3


In [11]:
for w in range(len(break_points) - 1):
    if chunks_done[w]:
        print(f"Skipping chunk {w + 1}/{len(break_points) - 1} due to previous training.")
        continue
    if args.which_chunk != -1 and w + 1 != args.which_chunk:
        print(f"Skipping chunk {w + 1}/{len(break_points) - 1} due to your request using --which-chunk.")
        continue

    print(f"Training on chunk {w + 1}/{len(break_points) - 1}")

    # Calculate chunk boundaries
    final_start_pos = max(0, break_points[w] - 2 * args.co)
    final_end_pos = min(dr.VARIANT_COUNT, break_points[w + 1] + 2 * args.co)
    offset_before = break_points[w] - final_start_pos
    offset_after = final_end_pos - break_points[w + 1]

    # Get data for this chunk
    ref_set = dr.get_ref_set(final_start_pos, final_end_pos).astype(np.int32)
    print(f"Data shape: {ref_set.shape}")

    # Remove duplicates from training
    ref_set_train = remove_similar_rows(ref_set[x_train_indices])
    ref_set_val = ref_set[x_valid_indices]

    # MAF bins counts
    valid_slice = slice(offset_before,
                        ref_set_train.shape[1] - offset_after)
    chunk_maf, chunk_bin_cnt = precompute_maf(
        ref_set_train[:, valid_slice], 
        mask_int=-1
    )
    chunk_maf = torch.from_numpy(chunk_maf).to(device)          # (L_chunk,)
    if args.verbose:
        print('Chunk MAF-bin counts:', chunk_bin_cnt)

    # Create targets (same as input for reconstruction)
    target_train = ref_set_train[:, offset_before:ref_set_train.shape[1] - offset_after]
    target_val = ref_set_val[:, offset_before:ref_set_val.shape[1] - offset_after]

    # Create datasets
    train_dataset = GenomicDataset(
        ref_set_train, target_train, dr.SEQ_DEPTH,
        offset_before, offset_after, training=True,
        masking_rates=(args.min_mr, args.max_mr)
    )

    val_dataset = GenomicDataset(
        ref_set_val, target_val, dr.SEQ_DEPTH,
        offset_before, offset_after, training=False,
        masking_rates=(args.min_mr, args.max_mr)
    )

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size_per_gpu,
                                shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size_per_gpu,
                            shuffle=False, num_workers=4, pin_memory=True)

    # Create model
    seq_len = ref_set.shape[1]
    model = create_model(args, seq_len, dr.SEQ_DEPTH)
    model.offset_before = offset_before
    model.offset_after = offset_after
    model.to(device)

    # Loss and optimizer
    criterion = ImputationLoss(use_r2=getattr(args, 'use_r2', True),use_focal=getattr(args, 'use_focal', True))
    optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    # optimizer = Lamb(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-7
    )

    # Training loop
    best_loss = float('inf')
    patience = args.earlystop_patience
    patience_counter = 0

    for epoch in range(args.epochs):
        model.train()
        train_loss = 0.0
        train_logits, train_gts, train_mask = [], [], []

        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{args.epochs}', leave=False)
        for batch_idx, (data, target) in enumerate(train_pbar):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss, logs = criterion(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_pbar.set_postfix({'loss': loss.item()})

            # === 收集训练结果 ===
            mask = data[..., -1].bool()         # 只关心被 mask 的位点
            train_logits.append(output.detach())
            train_gts.append(target.detach())
            train_mask.append(mask)

        # 训练集 MAF-acc
        train_logits = torch.cat(train_logits, dim=0)
        train_gts    = torch.cat(train_gts,    dim=0)
        train_mask   = torch.cat(train_mask,   dim=0)
        # 只保留有效位点（去掉 offset  padding）
        if model.offset_before > 0 or model.offset_after > 0:
            train_mask   = train_mask  [:, model.offset_before : train_mask.shape[1]  -model.offset_after]
        train_maf_accs = imputation_maf_accuracy_epoch(train_logits, train_gts, chunk_maf, mask=train_mask)

        # ----------- 验证循环同理 ------------
        model.eval()
        val_loss = 0.0
        val_logits, val_gts = [], []
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                loss, logs = criterion(output, target)
                val_loss += loss.item()

                mask = data[..., -1].bool()
                val_logits.append(output)
                val_gts.append(target)

        val_logits = torch.cat(val_logits, dim=0)
        val_gts    = torch.cat(val_gts,    dim=0)
        val_maf_accs = imputation_maf_accuracy_epoch(
            val_logits, val_gts, chunk_maf,  mask=None,)

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss   = val_loss   / len(val_loader)

        if args.verbose >= 1:
            avg_train_loss = train_loss / len(train_loader)
            avg_val_loss   = val_loss   / len(val_loader)
            print(f'Chunk {w + 1}, Epoch {epoch + 1}/{args.epochs}, '
                  f'Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

            # 用 DataFrame 打印 MAF-bin 结果
            maf_df = pd.DataFrame({
                'MAF_bin': ['(0.00, 0.05)', '(0.05, 0.10)', '(0.10, 0.20)',
                            '(0.20, 0.30)', '(0.30, 0.40)', '(0.40, 0.50)'],
                'Counts':  [f"{c}" for c in chunk_bin_cnt],
                'Train':   [f"{acc:.2f}" for acc in train_maf_accs],
                'Val':     [f"{acc:.2f}" for acc in val_maf_accs]
            })
            print(maf_df.to_string(index=False))

        scheduler.step(avg_val_loss)

        # Early stopping
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            patience_counter = 0
            # Save best model
            import os
            os.makedirs(f'{args.save_dir}/models', exist_ok=True)
            torch.save(model.state_dict(), f'{args.save_dir}/models/w_{w}.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                if args.verbose >= 1:
                    print('Early stopping triggered')
                break

    # Mark chunk as done
    chunks_done[w] = True
    save_chunk_status(args.save_dir, chunks_done)

    # Clean up
    del ref_set, train_dataset, val_dataset, train_loader, val_loader, model
    torch.cuda.empty_cache() if torch.cuda.is_available() else None

Training on chunk 1/2
Data shape: (4808, 65664)
Finding duplicate haploids in training set.
Removed 0 rows. 4744 training samples remaining.
Chunk MAF-bin counts: [26859, 9314, 10413, 6899, 6055, 5992]


                                                                            

Chunk 1, Epoch 1/100, Train Loss: 295609.4848, Val Loss: 296327.9570
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 97.57 97.92
(0.05, 0.10)   9314 92.86 93.73
(0.10, 0.20)  10413 86.00 87.08
(0.20, 0.30)   6899 77.20 78.01
(0.30, 0.40)   6055 69.00 71.46
(0.40, 0.50)   5992 64.41 68.41


                                                                            

Chunk 1, Epoch 2/100, Train Loss: 239563.2331, Val Loss: 227306.3887
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 97.73 98.07
(0.05, 0.10)   9314 93.73 94.63
(0.10, 0.20)  10413 88.13 89.23
(0.20, 0.30)   6899 81.20 82.64
(0.30, 0.40)   6055 75.96 78.54
(0.40, 0.50)   5992 72.79 76.12


                                                                            

Chunk 1, Epoch 3/100, Train Loss: 201804.9939, Val Loss: 189574.9902
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 97.81 98.18
(0.05, 0.10)   9314 94.22 95.09
(0.10, 0.20)  10413 89.63 90.86
(0.20, 0.30)   6899 83.80 84.98
(0.30, 0.40)   6055 80.25 82.10
(0.40, 0.50)   5992 77.93 80.82


                                                                            

Chunk 1, Epoch 4/100, Train Loss: 172447.0639, Val Loss: 157039.9453
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 97.85 98.28
(0.05, 0.10)   9314 94.69 95.74
(0.10, 0.20)  10413 90.85 92.06
(0.20, 0.30)   6899 85.67 87.39
(0.30, 0.40)   6055 83.23 85.46
(0.40, 0.50)   5992 81.98 84.53


                                                                            

Chunk 1, Epoch 5/100, Train Loss: 150323.7915, Val Loss: 130741.6641
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 97.93 98.38
(0.05, 0.10)   9314 95.13 96.24
(0.10, 0.20)  10413 91.66 93.19
(0.20, 0.30)   6899 87.30 89.28
(0.30, 0.40)   6055 85.45 87.83
(0.40, 0.50)   5992 84.82 87.39


                                                                            

Chunk 1, Epoch 6/100, Train Loss: 134280.2834, Val Loss: 119666.8320
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.01 98.44
(0.05, 0.10)   9314 95.48 96.54
(0.10, 0.20)  10413 92.42 93.79
(0.20, 0.30)   6899 88.42 90.20
(0.30, 0.40)   6055 86.86 88.81
(0.40, 0.50)   5992 86.41 88.12


                                                                            

Chunk 1, Epoch 7/100, Train Loss: 118863.0824, Val Loss: 101801.0664
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.07 98.53
(0.05, 0.10)   9314 95.78 96.87
(0.10, 0.20)  10413 93.01 94.42
(0.20, 0.30)   6899 89.41 91.37
(0.30, 0.40)   6055 88.28 90.60
(0.40, 0.50)   5992 88.24 90.41


                                                                            

Chunk 1, Epoch 8/100, Train Loss: 107080.7662, Val Loss: 91742.4648
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.14 98.56
(0.05, 0.10)   9314 96.06 96.95
(0.10, 0.20)  10413 93.52 94.96
(0.20, 0.30)   6899 90.36 92.17
(0.30, 0.40)   6055 89.39 91.53
(0.40, 0.50)   5992 89.25 91.20


                                                                            

Chunk 1, Epoch 9/100, Train Loss: 97968.5088, Val Loss: 83211.2305
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.18 98.60
(0.05, 0.10)   9314 96.29 97.20
(0.10, 0.20)  10413 93.97 95.37
(0.20, 0.30)   6899 91.02 92.68
(0.30, 0.40)   6055 90.13 91.98
(0.40, 0.50)   5992 89.89 91.85


                                                                             

Chunk 1, Epoch 10/100, Train Loss: 90602.4211, Val Loss: 74232.4277
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.25 98.68
(0.05, 0.10)   9314 96.51 97.46
(0.10, 0.20)  10413 94.36 95.79
(0.20, 0.30)   6899 91.61 93.49
(0.30, 0.40)   6055 90.70 92.74
(0.40, 0.50)   5992 90.46 92.43


                                                                             

Chunk 1, Epoch 11/100, Train Loss: 84675.5249, Val Loss: 71100.6523
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.30 98.71
(0.05, 0.10)   9314 96.67 97.48
(0.10, 0.20)  10413 94.63 95.89
(0.20, 0.30)   6899 92.02 93.65
(0.30, 0.40)   6055 91.12 92.85
(0.40, 0.50)   5992 90.87 92.52


                                                                             

Chunk 1, Epoch 12/100, Train Loss: 80127.4225, Val Loss: 64839.9336
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.33 98.77
(0.05, 0.10)   9314 96.79 97.73
(0.10, 0.20)  10413 94.83 96.24
(0.20, 0.30)   6899 92.35 94.10
(0.30, 0.40)   6055 91.46 93.48
(0.40, 0.50)   5992 91.15 93.00


                                                                             

Chunk 1, Epoch 13/100, Train Loss: 75636.8581, Val Loss: 64043.7910
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.40 98.80
(0.05, 0.10)   9314 96.93 97.76
(0.10, 0.20)  10413 95.03 96.26
(0.20, 0.30)   6899 92.63 94.15
(0.30, 0.40)   6055 91.76 93.41
(0.40, 0.50)   5992 91.44 93.08


                                                                             

Chunk 1, Epoch 14/100, Train Loss: 72508.1139, Val Loss: 59331.6621
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.44 98.85
(0.05, 0.10)   9314 97.03 97.91
(0.10, 0.20)  10413 95.16 96.42
(0.20, 0.30)   6899 92.84 94.34
(0.30, 0.40)   6055 91.98 93.79
(0.40, 0.50)   5992 91.66 93.35


                                                                             

Chunk 1, Epoch 15/100, Train Loss: 69567.0352, Val Loss: 54365.5449
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.48 98.88
(0.05, 0.10)   9314 97.13 97.98
(0.10, 0.20)  10413 95.30 96.57
(0.20, 0.30)   6899 93.00 94.56
(0.30, 0.40)   6055 92.22 94.13
(0.40, 0.50)   5992 91.86 93.70


                                                                             

Chunk 1, Epoch 16/100, Train Loss: 67144.5160, Val Loss: 52225.8320
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.51 98.91
(0.05, 0.10)   9314 97.18 98.04
(0.10, 0.20)  10413 95.40 96.68
(0.20, 0.30)   6899 93.15 94.73
(0.30, 0.40)   6055 92.36 94.29
(0.40, 0.50)   5992 92.03 93.95


                                                                             

Chunk 1, Epoch 17/100, Train Loss: 64477.9364, Val Loss: 50094.2402
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.54 98.94
(0.05, 0.10)   9314 97.24 98.11
(0.10, 0.20)  10413 95.50 96.75
(0.20, 0.30)   6899 93.29 94.81
(0.30, 0.40)   6055 92.56 94.44
(0.40, 0.50)   5992 92.28 94.22


                                                                             

Chunk 1, Epoch 18/100, Train Loss: 61837.3356, Val Loss: 49241.9375
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.57 98.94
(0.05, 0.10)   9314 97.32 98.14
(0.10, 0.20)  10413 95.61 96.78
(0.20, 0.30)   6899 93.44 94.85
(0.30, 0.40)   6055 92.75 94.48
(0.40, 0.50)   5992 92.46 94.23


                                                                             

Chunk 1, Epoch 19/100, Train Loss: 59858.7230, Val Loss: 44341.2988
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.59 98.98
(0.05, 0.10)   9314 97.37 98.24
(0.10, 0.20)  10413 95.71 97.03
(0.20, 0.30)   6899 93.56 95.16
(0.30, 0.40)   6055 92.88 94.88
(0.40, 0.50)   5992 92.62 94.67


                                                                             

Chunk 1, Epoch 20/100, Train Loss: 57624.7220, Val Loss: 42726.2168
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.62 99.02
(0.05, 0.10)   9314 97.42 98.25
(0.10, 0.20)  10413 95.81 97.09
(0.20, 0.30)   6899 93.70 95.28
(0.30, 0.40)   6055 93.02 94.97
(0.40, 0.50)   5992 92.80 94.79


                                                                             

Chunk 1, Epoch 21/100, Train Loss: 56040.7366, Val Loss: 41513.4707
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.65 99.03
(0.05, 0.10)   9314 97.47 98.29
(0.10, 0.20)  10413 95.89 97.16
(0.20, 0.30)   6899 93.80 95.36
(0.30, 0.40)   6055 93.14 95.08
(0.40, 0.50)   5992 92.89 94.90


                                                                             

Chunk 1, Epoch 22/100, Train Loss: 54557.6240, Val Loss: 39748.0977
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.68 99.05
(0.05, 0.10)   9314 97.54 98.34
(0.10, 0.20)  10413 95.98 97.25
(0.20, 0.30)   6899 93.91 95.47
(0.30, 0.40)   6055 93.26 95.16
(0.40, 0.50)   5992 93.03 94.94


                                                                             

Chunk 1, Epoch 23/100, Train Loss: 52841.5636, Val Loss: 39173.2363
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.68 99.05
(0.05, 0.10)   9314 97.57 98.32
(0.10, 0.20)  10413 96.04 97.24
(0.20, 0.30)   6899 94.00 95.48
(0.30, 0.40)   6055 93.42 95.31
(0.40, 0.50)   5992 93.11 94.96


                                                                             

Chunk 1, Epoch 24/100, Train Loss: 50996.9863, Val Loss: 38183.8164
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.71 99.08
(0.05, 0.10)   9314 97.62 98.38
(0.10, 0.20)  10413 96.13 97.33
(0.20, 0.30)   6899 94.10 95.61
(0.30, 0.40)   6055 93.55 95.41
(0.40, 0.50)   5992 93.22 95.04


                                                                             

Chunk 1, Epoch 25/100, Train Loss: 49262.1411, Val Loss: 35747.3906
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.74 99.09
(0.05, 0.10)   9314 97.67 98.44
(0.10, 0.20)  10413 96.21 97.42
(0.20, 0.30)   6899 94.21 95.71
(0.30, 0.40)   6055 93.71 95.56
(0.40, 0.50)   5992 93.31 95.17


                                                                             

Chunk 1, Epoch 26/100, Train Loss: 48038.7004, Val Loss: 34029.5957
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.75 99.11
(0.05, 0.10)   9314 97.71 98.49
(0.10, 0.20)  10413 96.27 97.51
(0.20, 0.30)   6899 94.25 95.76
(0.30, 0.40)   6055 93.81 95.65
(0.40, 0.50)   5992 93.39 95.33


                                                                             

Chunk 1, Epoch 27/100, Train Loss: 47198.9708, Val Loss: 33648.5312
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.77 99.11
(0.05, 0.10)   9314 97.73 98.46
(0.10, 0.20)  10413 96.31 97.52
(0.20, 0.30)   6899 94.32 95.83
(0.30, 0.40)   6055 93.87 95.73
(0.40, 0.50)   5992 93.44 95.40


                                                                             

Chunk 1, Epoch 28/100, Train Loss: 46397.6648, Val Loss: 33365.0234
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.78 99.13
(0.05, 0.10)   9314 97.76 98.49
(0.10, 0.20)  10413 96.36 97.55
(0.20, 0.30)   6899 94.39 95.86
(0.30, 0.40)   6055 93.93 95.74
(0.40, 0.50)   5992 93.52 95.36


                                                                             

Chunk 1, Epoch 29/100, Train Loss: 45193.0173, Val Loss: 31626.1152
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.80 99.14
(0.05, 0.10)   9314 97.78 98.52
(0.10, 0.20)  10413 96.40 97.62
(0.20, 0.30)   6899 94.45 95.99
(0.30, 0.40)   6055 94.00 95.85
(0.40, 0.50)   5992 93.57 95.43


                                                                             

Chunk 1, Epoch 30/100, Train Loss: 44357.9719, Val Loss: 32054.2227
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.81 99.13
(0.05, 0.10)   9314 97.82 98.50
(0.10, 0.20)  10413 96.45 97.59
(0.20, 0.30)   6899 94.50 95.97
(0.30, 0.40)   6055 94.06 95.81
(0.40, 0.50)   5992 93.65 95.49


                                                                             

Chunk 1, Epoch 31/100, Train Loss: 43505.9880, Val Loss: 30342.2188
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.81 99.15
(0.05, 0.10)   9314 97.83 98.56
(0.10, 0.20)  10413 96.46 97.65
(0.20, 0.30)   6899 94.55 96.06
(0.30, 0.40)   6055 94.07 95.85
(0.40, 0.50)   5992 93.66 95.55


                                                                             

Chunk 1, Epoch 32/100, Train Loss: 42135.5233, Val Loss: 28950.4023
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.84 99.17
(0.05, 0.10)   9314 97.88 98.59
(0.10, 0.20)  10413 96.55 97.71
(0.20, 0.30)   6899 94.63 96.14
(0.30, 0.40)   6055 94.18 96.00
(0.40, 0.50)   5992 93.78 95.70


                                                                             

Chunk 1, Epoch 33/100, Train Loss: 41518.4672, Val Loss: 29880.1797
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.85 99.16
(0.05, 0.10)   9314 97.89 98.56
(0.10, 0.20)  10413 96.57 97.63
(0.20, 0.30)   6899 94.69 96.06
(0.30, 0.40)   6055 94.22 95.90
(0.40, 0.50)   5992 93.80 95.63


                                                                             

Chunk 1, Epoch 34/100, Train Loss: 40960.6821, Val Loss: 27956.5430
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.86 99.17
(0.05, 0.10)   9314 97.91 98.61
(0.10, 0.20)  10413 96.60 97.77
(0.20, 0.30)   6899 94.72 96.27
(0.30, 0.40)   6055 94.24 96.03
(0.40, 0.50)   5992 93.86 95.77


                                                                             

Chunk 1, Epoch 35/100, Train Loss: 40285.6516, Val Loss: 27213.9102
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.87 99.20
(0.05, 0.10)   9314 97.93 98.64
(0.10, 0.20)  10413 96.62 97.79
(0.20, 0.30)   6899 94.77 96.20
(0.30, 0.40)   6055 94.29 96.08
(0.40, 0.50)   5992 93.90 95.81


                                                                             

Chunk 1, Epoch 36/100, Train Loss: 39586.2260, Val Loss: 27171.3555
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.88 99.20
(0.05, 0.10)   9314 97.95 98.65
(0.10, 0.20)  10413 96.65 97.80
(0.20, 0.30)   6899 94.80 96.30
(0.30, 0.40)   6055 94.32 96.10
(0.40, 0.50)   5992 93.94 95.81


                                                                             

Chunk 1, Epoch 37/100, Train Loss: 38942.8162, Val Loss: 26231.6562
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.89 99.21
(0.05, 0.10)   9314 97.97 98.62
(0.10, 0.20)  10413 96.69 97.84
(0.20, 0.30)   6899 94.85 96.32
(0.30, 0.40)   6055 94.37 96.15
(0.40, 0.50)   5992 93.99 95.91


                                                                             

Chunk 1, Epoch 38/100, Train Loss: 38153.4020, Val Loss: 26121.6152
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.91 99.22
(0.05, 0.10)   9314 97.98 98.68
(0.10, 0.20)  10413 96.72 97.85
(0.20, 0.30)   6899 94.89 96.37
(0.30, 0.40)   6055 94.41 96.12
(0.40, 0.50)   5992 94.02 95.89


                                                                             

Chunk 1, Epoch 39/100, Train Loss: 37578.4324, Val Loss: 25004.0449
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.91 99.22
(0.05, 0.10)   9314 98.01 98.69
(0.10, 0.20)  10413 96.76 97.90
(0.20, 0.30)   6899 94.92 96.44
(0.30, 0.40)   6055 94.44 96.23
(0.40, 0.50)   5992 94.06 95.99


                                                                             

Chunk 1, Epoch 40/100, Train Loss: 36918.2299, Val Loss: 25277.5078
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.92 99.22
(0.05, 0.10)   9314 98.03 98.67
(0.10, 0.20)  10413 96.80 97.88
(0.20, 0.30)   6899 94.97 96.37
(0.30, 0.40)   6055 94.49 96.18
(0.40, 0.50)   5992 94.10 95.91


                                                                             

Chunk 1, Epoch 41/100, Train Loss: 37237.2215, Val Loss: 25763.3574
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.92 99.21
(0.05, 0.10)   9314 98.02 98.65
(0.10, 0.20)  10413 96.79 97.89
(0.20, 0.30)   6899 94.96 96.35
(0.30, 0.40)   6055 94.48 96.16
(0.40, 0.50)   5992 94.12 95.87


                                                                             

Chunk 1, Epoch 42/100, Train Loss: 36400.1615, Val Loss: 24520.0566
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.94 99.23
(0.05, 0.10)   9314 98.05 98.72
(0.10, 0.20)  10413 96.81 97.93
(0.20, 0.30)   6899 95.01 96.52
(0.30, 0.40)   6055 94.52 96.25
(0.40, 0.50)   5992 94.15 95.97


                                                                             

Chunk 1, Epoch 43/100, Train Loss: 35760.0408, Val Loss: 23892.3574
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.95 99.23
(0.05, 0.10)   9314 98.08 98.72
(0.10, 0.20)  10413 96.85 97.96
(0.20, 0.30)   6899 95.04 96.49
(0.30, 0.40)   6055 94.57 96.27
(0.40, 0.50)   5992 94.20 96.01


                                                                             

Chunk 1, Epoch 44/100, Train Loss: 34933.5639, Val Loss: 23254.3535
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.97 99.26
(0.05, 0.10)   9314 98.10 98.71
(0.10, 0.20)  10413 96.89 97.98
(0.20, 0.30)   6899 95.10 96.56
(0.30, 0.40)   6055 94.62 96.33
(0.40, 0.50)   5992 94.25 96.08


                                                                             

Chunk 1, Epoch 45/100, Train Loss: 34433.7395, Val Loss: 23358.0664
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.96 99.25
(0.05, 0.10)   9314 98.09 98.71
(0.10, 0.20)  10413 96.89 97.98
(0.20, 0.30)   6899 95.10 96.55
(0.30, 0.40)   6055 94.62 96.31
(0.40, 0.50)   5992 94.26 96.11


                                                                             

Chunk 1, Epoch 46/100, Train Loss: 34374.7570, Val Loss: 22478.3418
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.97 99.25
(0.05, 0.10)   9314 98.11 98.73
(0.10, 0.20)  10413 96.90 97.99
(0.20, 0.30)   6899 95.11 96.59
(0.30, 0.40)   6055 94.64 96.36
(0.40, 0.50)   5992 94.27 96.17


                                                                             

Chunk 1, Epoch 47/100, Train Loss: 33666.9783, Val Loss: 22617.4590
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.98 99.26
(0.05, 0.10)   9314 98.14 98.72
(0.10, 0.20)  10413 96.95 98.01
(0.20, 0.30)   6899 95.17 96.57
(0.30, 0.40)   6055 94.70 96.41
(0.40, 0.50)   5992 94.31 96.14


                                                                             

Chunk 1, Epoch 48/100, Train Loss: 33144.7332, Val Loss: 21635.3633
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.99 99.26
(0.05, 0.10)   9314 98.13 98.78
(0.10, 0.20)  10413 96.95 98.03
(0.20, 0.30)   6899 95.17 96.63
(0.30, 0.40)   6055 94.72 96.45
(0.40, 0.50)   5992 94.34 96.21


                                                                             

Chunk 1, Epoch 49/100, Train Loss: 32749.6439, Val Loss: 21575.8418
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 98.99 99.24
(0.05, 0.10)   9314 98.15 98.76
(0.10, 0.20)  10413 96.97 98.06
(0.20, 0.30)   6899 95.20 96.65
(0.30, 0.40)   6055 94.73 96.46
(0.40, 0.50)   5992 94.37 96.24


                                                                             

Chunk 1, Epoch 50/100, Train Loss: 32196.1603, Val Loss: 22042.0176
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.01 99.26
(0.05, 0.10)   9314 98.18 98.73
(0.10, 0.20)  10413 97.00 98.02
(0.20, 0.30)   6899 95.24 96.62
(0.30, 0.40)   6055 94.77 96.43
(0.40, 0.50)   5992 94.41 96.22


                                                                             

Chunk 1, Epoch 51/100, Train Loss: 32172.8958, Val Loss: 20906.0977
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.01 99.25
(0.05, 0.10)   9314 98.18 98.78
(0.10, 0.20)  10413 97.00 98.06
(0.20, 0.30)   6899 95.24 96.69
(0.30, 0.40)   6055 94.78 96.52
(0.40, 0.50)   5992 94.41 96.29


                                                                             

Chunk 1, Epoch 52/100, Train Loss: 31871.0988, Val Loss: 21877.2031
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.01 99.26
(0.05, 0.10)   9314 98.18 98.74
(0.10, 0.20)  10413 97.00 98.05
(0.20, 0.30)   6899 95.25 96.62
(0.30, 0.40)   6055 94.79 96.40
(0.40, 0.50)   5992 94.41 96.21


                                                                             

Chunk 1, Epoch 53/100, Train Loss: 31162.5486, Val Loss: 20317.1875
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.03 99.28
(0.05, 0.10)   9314 98.21 98.80
(0.10, 0.20)  10413 97.04 98.10
(0.20, 0.30)   6899 95.30 96.70
(0.30, 0.40)   6055 94.85 96.52
(0.40, 0.50)   5992 94.46 96.29


                                                                             

Chunk 1, Epoch 54/100, Train Loss: 31080.9441, Val Loss: 20370.6250
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.02 99.28
(0.05, 0.10)   9314 98.20 98.80
(0.10, 0.20)  10413 97.04 98.11
(0.20, 0.30)   6899 95.30 96.70
(0.30, 0.40)   6055 94.84 96.55
(0.40, 0.50)   5992 94.47 96.30


                                                                             

Chunk 1, Epoch 55/100, Train Loss: 30566.2848, Val Loss: 20318.6836
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.03 99.28
(0.05, 0.10)   9314 98.21 98.77
(0.10, 0.20)  10413 97.06 98.09
(0.20, 0.30)   6899 95.33 96.70
(0.30, 0.40)   6055 94.86 96.54
(0.40, 0.50)   5992 94.48 96.30


                                                                             

Chunk 1, Epoch 56/100, Train Loss: 30099.6805, Val Loss: 20154.8418
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.04 99.29
(0.05, 0.10)   9314 98.22 98.79
(0.10, 0.20)  10413 97.07 98.11
(0.20, 0.30)   6899 95.34 96.68
(0.30, 0.40)   6055 94.89 96.55
(0.40, 0.50)   5992 94.52 96.36


                                                                             

Chunk 1, Epoch 57/100, Train Loss: 30303.6649, Val Loss: 19514.5000
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.04 99.29
(0.05, 0.10)   9314 98.24 98.81
(0.10, 0.20)  10413 97.09 98.13
(0.20, 0.30)   6899 95.37 96.74
(0.30, 0.40)   6055 94.90 96.57
(0.40, 0.50)   5992 94.54 96.37


                                                                             

Chunk 1, Epoch 58/100, Train Loss: 29795.0378, Val Loss: 19387.2324
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.05 99.30
(0.05, 0.10)   9314 98.24 98.83
(0.10, 0.20)  10413 97.09 98.12
(0.20, 0.30)   6899 95.36 96.75
(0.30, 0.40)   6055 94.90 96.57
(0.40, 0.50)   5992 94.54 96.36


                                                                             

Chunk 1, Epoch 59/100, Train Loss: 29512.3097, Val Loss: 19104.1914
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.05 99.28
(0.05, 0.10)   9314 98.25 98.82
(0.10, 0.20)  10413 97.12 98.14
(0.20, 0.30)   6899 95.40 96.80
(0.30, 0.40)   6055 94.93 96.61
(0.40, 0.50)   5992 94.57 96.41


                                                                             

Chunk 1, Epoch 60/100, Train Loss: 28956.3998, Val Loss: 18759.5078
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.06 99.29
(0.05, 0.10)   9314 98.27 98.82
(0.10, 0.20)  10413 97.13 98.16
(0.20, 0.30)   6899 95.41 96.81
(0.30, 0.40)   6055 94.96 96.63
(0.40, 0.50)   5992 94.60 96.44


                                                                             

Chunk 1, Epoch 61/100, Train Loss: 28958.5626, Val Loss: 18997.8555
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.06 99.29
(0.05, 0.10)   9314 98.26 98.83
(0.10, 0.20)  10413 97.14 98.14
(0.20, 0.30)   6899 95.42 96.78
(0.30, 0.40)   6055 94.95 96.62
(0.40, 0.50)   5992 94.60 96.41


                                                                             

Chunk 1, Epoch 62/100, Train Loss: 28574.4994, Val Loss: 18464.1211
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.07 99.29
(0.05, 0.10)   9314 98.28 98.81
(0.10, 0.20)  10413 97.15 98.17
(0.20, 0.30)   6899 95.43 96.82
(0.30, 0.40)   6055 94.96 96.70
(0.40, 0.50)   5992 94.62 96.45


                                                                             

Chunk 1, Epoch 63/100, Train Loss: 28579.1445, Val Loss: 18274.7949
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.08 99.30
(0.05, 0.10)   9314 98.29 98.83
(0.10, 0.20)  10413 97.17 98.19
(0.20, 0.30)   6899 95.45 96.88
(0.30, 0.40)   6055 94.99 96.68
(0.40, 0.50)   5992 94.64 96.46


                                                                             

Chunk 1, Epoch 64/100, Train Loss: 27976.1648, Val Loss: 18075.0762
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.08 99.29
(0.05, 0.10)   9314 98.30 98.83
(0.10, 0.20)  10413 97.19 98.16
(0.20, 0.30)   6899 95.48 96.80
(0.30, 0.40)   6055 95.02 96.68
(0.40, 0.50)   5992 94.67 96.49


                                                                             

Chunk 1, Epoch 65/100, Train Loss: 27571.4187, Val Loss: 17910.2617
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.08 99.32
(0.05, 0.10)   9314 98.31 98.86
(0.10, 0.20)  10413 97.20 98.22
(0.20, 0.30)   6899 95.49 96.87
(0.30, 0.40)   6055 95.03 96.64
(0.40, 0.50)   5992 94.69 96.42


                                                                             

Chunk 1, Epoch 66/100, Train Loss: 27583.6186, Val Loss: 17716.0820
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.08 99.30
(0.05, 0.10)   9314 98.30 98.86
(0.10, 0.20)  10413 97.20 98.21
(0.20, 0.30)   6899 95.49 96.87
(0.30, 0.40)   6055 95.02 96.72
(0.40, 0.50)   5992 94.68 96.49


                                                                             

Chunk 1, Epoch 67/100, Train Loss: 26986.5704, Val Loss: 17171.0430
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.10 99.31
(0.05, 0.10)   9314 98.33 98.87
(0.10, 0.20)  10413 97.24 98.22
(0.20, 0.30)   6899 95.52 96.88
(0.30, 0.40)   6055 95.07 96.75
(0.40, 0.50)   5992 94.71 96.54


                                                                             

Chunk 1, Epoch 68/100, Train Loss: 26492.4601, Val Loss: 17663.6465
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.10 99.29
(0.05, 0.10)   9314 98.33 98.84
(0.10, 0.20)  10413 97.24 98.22
(0.20, 0.30)   6899 95.54 96.89
(0.30, 0.40)   6055 95.09 96.76
(0.40, 0.50)   5992 94.74 96.52


                                                                             

Chunk 1, Epoch 69/100, Train Loss: 26705.4166, Val Loss: 17258.9629
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.10 99.32
(0.05, 0.10)   9314 98.34 98.86
(0.10, 0.20)  10413 97.25 98.23
(0.20, 0.30)   6899 95.55 96.87
(0.30, 0.40)   6055 95.09 96.72
(0.40, 0.50)   5992 94.75 96.50


                                                                             

Chunk 1, Epoch 70/100, Train Loss: 26891.3776, Val Loss: 17172.1777
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.10 99.30
(0.05, 0.10)   9314 98.34 98.90
(0.10, 0.20)  10413 97.26 98.24
(0.20, 0.30)   6899 95.55 96.94
(0.30, 0.40)   6055 95.08 96.76
(0.40, 0.50)   5992 94.74 96.54


                                                                             

Chunk 1, Epoch 71/100, Train Loss: 26394.0794, Val Loss: 17172.3320
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.10 99.31
(0.05, 0.10)   9314 98.34 98.87
(0.10, 0.20)  10413 97.25 98.23
(0.20, 0.30)   6899 95.55 96.94
(0.30, 0.40)   6055 95.10 96.75
(0.40, 0.50)   5992 94.75 96.54


                                                                             

Chunk 1, Epoch 72/100, Train Loss: 23317.7481, Val Loss: 15146.2617
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.17 99.34
(0.05, 0.10)   9314 98.44 98.91
(0.10, 0.20)  10413 97.40 98.30
(0.20, 0.30)   6899 95.75 97.02
(0.30, 0.40)   6055 95.29 96.86
(0.40, 0.50)   5992 94.95 96.68


                                                                             

Chunk 1, Epoch 73/100, Train Loss: 22462.6163, Val Loss: 14541.1797
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.19 99.37
(0.05, 0.10)   9314 98.48 98.94
(0.10, 0.20)  10413 97.44 98.34
(0.20, 0.30)   6899 95.80 97.07
(0.30, 0.40)   6055 95.34 96.89
(0.40, 0.50)   5992 95.01 96.70


                                                                             

Chunk 1, Epoch 74/100, Train Loss: 21818.2318, Val Loss: 14477.3340
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.19 99.37
(0.05, 0.10)   9314 98.47 98.94
(0.10, 0.20)  10413 97.44 98.32
(0.20, 0.30)   6899 95.82 97.03
(0.30, 0.40)   6055 95.36 96.87
(0.40, 0.50)   5992 95.02 96.72


                                                                             

Chunk 1, Epoch 75/100, Train Loss: 22210.2676, Val Loss: 14313.5020
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.19 99.38
(0.05, 0.10)   9314 98.48 98.94
(0.10, 0.20)  10413 97.45 98.33
(0.20, 0.30)   6899 95.82 97.03
(0.30, 0.40)   6055 95.35 96.88
(0.40, 0.50)   5992 95.02 96.74


                                                                             

Chunk 1, Epoch 76/100, Train Loss: 21874.5508, Val Loss: 14120.8535
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.19 99.36
(0.05, 0.10)   9314 98.48 98.94
(0.10, 0.20)  10413 97.46 98.34
(0.20, 0.30)   6899 95.83 97.09
(0.30, 0.40)   6055 95.38 96.93
(0.40, 0.50)   5992 95.04 96.76


                                                                             

Chunk 1, Epoch 77/100, Train Loss: 21545.3752, Val Loss: 13947.8477
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.20 99.38
(0.05, 0.10)   9314 98.48 98.95
(0.10, 0.20)  10413 97.46 98.34
(0.20, 0.30)   6899 95.83 97.09
(0.30, 0.40)   6055 95.37 96.86
(0.40, 0.50)   5992 95.05 96.74


                                                                             

Chunk 1, Epoch 78/100, Train Loss: 21438.5501, Val Loss: 13697.1953
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.20 99.39
(0.05, 0.10)   9314 98.49 98.96
(0.10, 0.20)  10413 97.47 98.35
(0.20, 0.30)   6899 95.85 97.07
(0.30, 0.40)   6055 95.38 96.91
(0.40, 0.50)   5992 95.05 96.79


                                                                             

Chunk 1, Epoch 79/100, Train Loss: 21566.7089, Val Loss: 13833.4023
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.21 99.37
(0.05, 0.10)   9314 98.50 98.94
(0.10, 0.20)  10413 97.48 98.36
(0.20, 0.30)   6899 95.84 97.10
(0.30, 0.40)   6055 95.41 96.91
(0.40, 0.50)   5992 95.08 96.80


                                                                             

Chunk 1, Epoch 80/100, Train Loss: 21176.8406, Val Loss: 14082.5547
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.21 99.36
(0.05, 0.10)   9314 98.50 98.93
(0.10, 0.20)  10413 97.48 98.33
(0.20, 0.30)   6899 95.86 97.05
(0.30, 0.40)   6055 95.40 96.88
(0.40, 0.50)   5992 95.07 96.73


                                                                             

Chunk 1, Epoch 81/100, Train Loss: 21077.5073, Val Loss: 13677.9629
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.21 99.38
(0.05, 0.10)   9314 98.50 98.95
(0.10, 0.20)  10413 97.49 98.37
(0.20, 0.30)   6899 95.86 97.14
(0.30, 0.40)   6055 95.41 96.93
(0.40, 0.50)   5992 95.09 96.80


                                                                             

Chunk 1, Epoch 82/100, Train Loss: 21327.1054, Val Loss: 13416.3672
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.20 99.37
(0.05, 0.10)   9314 98.50 98.98
(0.10, 0.20)  10413 97.49 98.37
(0.20, 0.30)   6899 95.86 97.11
(0.30, 0.40)   6055 95.41 96.97
(0.40, 0.50)   5992 95.09 96.79


                                                                             

Chunk 1, Epoch 83/100, Train Loss: 21070.0387, Val Loss: 13613.1094
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.21 99.37
(0.05, 0.10)   9314 98.51 98.94
(0.10, 0.20)  10413 97.50 98.38
(0.20, 0.30)   6899 95.90 97.13
(0.30, 0.40)   6055 95.46 96.95
(0.40, 0.50)   5992 95.13 96.82


                                                                             

Chunk 1, Epoch 84/100, Train Loss: 20750.9116, Val Loss: 13476.8965
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.22 99.37
(0.05, 0.10)   9314 98.52 98.94
(0.10, 0.20)  10413 97.51 98.35
(0.20, 0.30)   6899 95.88 97.10
(0.30, 0.40)   6055 95.45 96.94
(0.40, 0.50)   5992 95.12 96.79


                                                                             

Chunk 1, Epoch 85/100, Train Loss: 20659.6542, Val Loss: 12990.4180
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.21 99.40
(0.05, 0.10)   9314 98.51 98.97
(0.10, 0.20)  10413 97.49 98.39
(0.20, 0.30)   6899 95.86 97.12
(0.30, 0.40)   6055 95.42 96.98
(0.40, 0.50)   5992 95.09 96.82


                                                                             

Chunk 1, Epoch 86/100, Train Loss: 20499.1233, Val Loss: 13269.6074
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.22 99.39
(0.05, 0.10)   9314 98.52 98.97
(0.10, 0.20)  10413 97.51 98.37
(0.20, 0.30)   6899 95.89 97.07
(0.30, 0.40)   6055 95.45 96.95
(0.40, 0.50)   5992 95.12 96.82


                                                                             

Chunk 1, Epoch 87/100, Train Loss: 20493.7837, Val Loss: 13069.7891
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.22 99.38
(0.05, 0.10)   9314 98.52 98.96
(0.10, 0.20)  10413 97.52 98.38
(0.20, 0.30)   6899 95.90 97.13
(0.30, 0.40)   6055 95.46 96.99
(0.40, 0.50)   5992 95.13 96.84


                                                                             

Chunk 1, Epoch 88/100, Train Loss: 20301.9371, Val Loss: 13086.0391
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.22 99.37
(0.05, 0.10)   9314 98.52 98.96
(0.10, 0.20)  10413 97.52 98.38
(0.20, 0.30)   6899 95.89 97.12
(0.30, 0.40)   6055 95.46 97.01
(0.40, 0.50)   5992 95.13 96.85


                                                                             

Chunk 1, Epoch 89/100, Train Loss: 20286.4540, Val Loss: 13072.1406
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.22 99.39
(0.05, 0.10)   9314 98.53 98.96
(0.10, 0.20)  10413 97.53 98.39
(0.20, 0.30)   6899 95.91 97.17
(0.30, 0.40)   6055 95.46 96.99
(0.40, 0.50)   5992 95.14 96.88


                                                                             

Chunk 1, Epoch 90/100, Train Loss: 18535.1574, Val Loss: 11967.3516
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.26 99.41
(0.05, 0.10)   9314 98.59 99.00
(0.10, 0.20)  10413 97.61 98.43
(0.20, 0.30)   6899 96.03 97.23
(0.30, 0.40)   6055 95.59 97.04
(0.40, 0.50)   5992 95.27 96.92


                                                                             

Chunk 1, Epoch 91/100, Train Loss: 18042.0691, Val Loss: 11776.3789
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.27 99.41
(0.05, 0.10)   9314 98.60 99.00
(0.10, 0.20)  10413 97.62 98.44
(0.20, 0.30)   6899 96.04 97.21
(0.30, 0.40)   6055 95.62 97.05
(0.40, 0.50)   5992 95.29 96.93


                                                                             

Chunk 1, Epoch 92/100, Train Loss: 18068.0742, Val Loss: 11728.6445
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.26 99.41
(0.05, 0.10)   9314 98.60 98.99
(0.10, 0.20)  10413 97.62 98.42
(0.20, 0.30)   6899 96.04 97.21
(0.30, 0.40)   6055 95.61 97.07
(0.40, 0.50)   5992 95.29 96.91


                                                                             

Chunk 1, Epoch 93/100, Train Loss: 17973.0510, Val Loss: 11938.1738
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.27 99.40
(0.05, 0.10)   9314 98.60 98.98
(0.10, 0.20)  10413 97.62 98.42
(0.20, 0.30)   6899 96.05 97.20
(0.30, 0.40)   6055 95.61 97.04
(0.40, 0.50)   5992 95.29 96.92


                                                                              

Chunk 1, Epoch 94/100, Train Loss: 17562.6238, Val Loss: 11613.6367
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.27 99.41
(0.05, 0.10)   9314 98.60 99.00
(0.10, 0.20)  10413 97.62 98.44
(0.20, 0.30)   6899 96.05 97.25
(0.30, 0.40)   6055 95.61 97.05
(0.40, 0.50)   5992 95.29 96.95


                                                                              

Chunk 1, Epoch 95/100, Train Loss: 17537.2970, Val Loss: 11548.2246
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.27 99.41
(0.05, 0.10)   9314 98.61 99.01
(0.10, 0.20)  10413 97.64 98.45
(0.20, 0.30)   6899 96.05 97.24
(0.30, 0.40)   6055 95.62 97.07
(0.40, 0.50)   5992 95.31 96.91


                                                                              

Chunk 1, Epoch 96/100, Train Loss: 17717.3938, Val Loss: 11618.7480
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.27 99.40
(0.05, 0.10)   9314 98.61 99.00
(0.10, 0.20)  10413 97.63 98.44
(0.20, 0.30)   6899 96.07 97.22
(0.30, 0.40)   6055 95.64 97.09
(0.40, 0.50)   5992 95.32 96.94


                                                                              

Chunk 1, Epoch 97/100, Train Loss: 17455.6649, Val Loss: 11311.5352
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.28 99.42
(0.05, 0.10)   9314 98.61 99.02
(0.10, 0.20)  10413 97.65 98.43
(0.20, 0.30)   6899 96.07 97.20
(0.30, 0.40)   6055 95.65 97.09
(0.40, 0.50)   5992 95.33 96.95


                                                                             

Chunk 1, Epoch 98/100, Train Loss: 17347.5030, Val Loss: 11403.9902
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.28 99.41
(0.05, 0.10)   9314 98.62 99.02
(0.10, 0.20)  10413 97.65 98.44
(0.20, 0.30)   6899 96.07 97.23
(0.30, 0.40)   6055 95.64 97.07
(0.40, 0.50)   5992 95.32 96.95


                                                                              

Chunk 1, Epoch 99/100, Train Loss: 17218.0646, Val Loss: 11465.0742
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.28 99.41
(0.05, 0.10)   9314 98.62 98.99
(0.10, 0.20)  10413 97.65 98.43
(0.20, 0.30)   6899 96.09 97.26
(0.30, 0.40)   6055 95.66 97.09
(0.40, 0.50)   5992 95.34 96.95


                                                                               

Chunk 1, Epoch 100/100, Train Loss: 17112.4287, Val Loss: 11281.4023
     MAF_bin Counts Train   Val
(0.00, 0.05)  26859 99.28 99.42
(0.05, 0.10)   9314 98.62 99.01
(0.10, 0.20)  10413 97.66 98.43
(0.20, 0.30)   6899 96.10 97.24
(0.30, 0.40)   6055 95.66 97.10
(0.40, 0.50)   5992 95.35 96.96
Training on chunk 2/2
Data shape: (4808, 33906)
Finding duplicate haploids in training set.
Removed 0 rows. 4744 training samples remaining.
Chunk MAF-bin counts: [15300, 4779, 5050, 3298, 2780, 2567]


                                                                            

Chunk 2, Epoch 1/100, Train Loss: 112614.6237, Val Loss: 81809.8496
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 97.56 98.37
(0.05, 0.10)   4779 93.30 95.45
(0.10, 0.20)   5050 88.29 92.47
(0.20, 0.30)   3298 81.74 88.39
(0.30, 0.40)   2780 77.64 86.10
(0.40, 0.50)   2567 75.49 85.51


                                                                            

Chunk 2, Epoch 2/100, Train Loss: 66436.4723, Val Loss: 49186.0801
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 97.94 98.63
(0.05, 0.10)   4779 94.84 96.90
(0.10, 0.20)   5050 92.55 95.45
(0.20, 0.30)   3298 89.54 93.70
(0.30, 0.40)   2780 88.93 92.67
(0.40, 0.50)   2567 88.65 92.12


                                                                            

Chunk 2, Epoch 3/100, Train Loss: 50235.9964, Val Loss: 38200.5498
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.10 98.79
(0.05, 0.10)   4779 95.83 97.55
(0.10, 0.20)   5050 94.23 96.26
(0.20, 0.30)   3298 92.43 95.12
(0.30, 0.40)   2780 91.71 94.42
(0.40, 0.50)   2567 91.51 94.06


                                                                            

Chunk 2, Epoch 4/100, Train Loss: 40580.3289, Val Loss: 33659.7881
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.26 98.87
(0.05, 0.10)   4779 96.54 97.81
(0.10, 0.20)   5050 95.25 96.77
(0.20, 0.30)   3298 93.92 95.78
(0.30, 0.40)   2780 93.33 94.98
(0.40, 0.50)   2567 93.08 94.69


                                                                            

Chunk 2, Epoch 5/100, Train Loss: 34704.1682, Val Loss: 25521.7412
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.37 99.00
(0.05, 0.10)   4779 96.95 98.20
(0.10, 0.20)   5050 95.83 97.34
(0.20, 0.30)   3298 94.68 96.55
(0.30, 0.40)   2780 94.23 95.88
(0.40, 0.50)   2567 93.97 95.72


                                                                            

Chunk 2, Epoch 6/100, Train Loss: 30767.5128, Val Loss: 21808.8691
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.49 99.11
(0.05, 0.10)   4779 97.25 98.48
(0.10, 0.20)   5050 96.24 97.68
(0.20, 0.30)   3298 95.15 96.93
(0.30, 0.40)   2780 94.79 96.52
(0.40, 0.50)   2567 94.46 96.13


                                                                            

Chunk 2, Epoch 7/100, Train Loss: 27380.7660, Val Loss: 16811.4619
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.60 99.20
(0.05, 0.10)   4779 97.51 98.66
(0.10, 0.20)   5050 96.59 98.14
(0.20, 0.30)   3298 95.59 97.48
(0.30, 0.40)   2780 95.25 97.16
(0.40, 0.50)   2567 94.88 96.83


                                                                            

Chunk 2, Epoch 8/100, Train Loss: 25237.8004, Val Loss: 19271.5615
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.67 99.22
(0.05, 0.10)   4779 97.66 98.66
(0.10, 0.20)   5050 96.78 97.85
(0.20, 0.30)   3298 95.81 97.24
(0.30, 0.40)   2780 95.48 96.76
(0.40, 0.50)   2567 95.10 96.36


                                                                            

Chunk 2, Epoch 9/100, Train Loss: 23007.3651, Val Loss: 16689.5918
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.75 99.27
(0.05, 0.10)   4779 97.82 98.80
(0.10, 0.20)   5050 96.98 98.14
(0.20, 0.30)   3298 96.05 97.44
(0.30, 0.40)   2780 95.75 96.97
(0.40, 0.50)   2567 95.34 96.63


                                                                             

Chunk 2, Epoch 10/100, Train Loss: 21752.5935, Val Loss: 15384.8281
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.80 99.31
(0.05, 0.10)   4779 97.93 98.84
(0.10, 0.20)   5050 97.14 98.24
(0.20, 0.30)   3298 96.22 97.55
(0.30, 0.40)   2780 95.90 97.08
(0.40, 0.50)   2567 95.52 96.81


                                                                             

Chunk 2, Epoch 11/100, Train Loss: 20284.0628, Val Loss: 14378.3281
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.85 99.33
(0.05, 0.10)   4779 98.05 98.89
(0.10, 0.20)   5050 97.27 98.27
(0.20, 0.30)   3298 96.41 97.73
(0.30, 0.40)   2780 96.07 97.31
(0.40, 0.50)   2567 95.68 97.01


                                                                             

Chunk 2, Epoch 12/100, Train Loss: 18990.6933, Val Loss: 14908.4619
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.90 99.36
(0.05, 0.10)   4779 98.15 98.94
(0.10, 0.20)   5050 97.38 98.27
(0.20, 0.30)   3298 96.53 97.66
(0.30, 0.40)   2780 96.23 97.21
(0.40, 0.50)   2567 95.84 96.85


                                                                             

Chunk 2, Epoch 13/100, Train Loss: 18054.4099, Val Loss: 11158.8906
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.93 99.44
(0.05, 0.10)   4779 98.22 99.05
(0.10, 0.20)   5050 97.47 98.61
(0.20, 0.30)   3298 96.64 98.05
(0.30, 0.40)   2780 96.33 97.72
(0.40, 0.50)   2567 95.95 97.43


                                                                             

Chunk 2, Epoch 14/100, Train Loss: 17059.9982, Val Loss: 10762.4648
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 98.97 99.43
(0.05, 0.10)   4779 98.29 99.12
(0.10, 0.20)   5050 97.56 98.62
(0.20, 0.30)   3298 96.74 98.10
(0.30, 0.40)   2780 96.45 97.78
(0.40, 0.50)   2567 96.04 97.45


                                                                             

Chunk 2, Epoch 15/100, Train Loss: 16109.2052, Val Loss: 9732.2910
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.00 99.46
(0.05, 0.10)   4779 98.34 99.12
(0.10, 0.20)   5050 97.63 98.65
(0.20, 0.30)   3298 96.82 98.17
(0.30, 0.40)   2780 96.53 97.90
(0.40, 0.50)   2567 96.14 97.57


                                                                             

Chunk 2, Epoch 16/100, Train Loss: 15603.1952, Val Loss: 9581.9590
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.02 99.48
(0.05, 0.10)   4779 98.39 99.15
(0.10, 0.20)   5050 97.69 98.70
(0.20, 0.30)   3298 96.91 98.17
(0.30, 0.40)   2780 96.60 97.95
(0.40, 0.50)   2567 96.21 97.63


                                                                             

Chunk 2, Epoch 17/100, Train Loss: 15165.1680, Val Loss: 9284.5576
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.05 99.47
(0.05, 0.10)   4779 98.44 99.16
(0.10, 0.20)   5050 97.76 98.69
(0.20, 0.30)   3298 96.97 98.22
(0.30, 0.40)   2780 96.67 97.93
(0.40, 0.50)   2567 96.29 97.60


                                                                             

Chunk 2, Epoch 18/100, Train Loss: 14419.7921, Val Loss: 8083.5811
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.07 99.51
(0.05, 0.10)   4779 98.48 99.22
(0.10, 0.20)   5050 97.80 98.83
(0.20, 0.30)   3298 97.01 98.29
(0.30, 0.40)   2780 96.73 98.09
(0.40, 0.50)   2567 96.34 97.72


                                                                             

Chunk 2, Epoch 19/100, Train Loss: 13963.5369, Val Loss: 7206.7354
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.09 99.52
(0.05, 0.10)   4779 98.52 99.26
(0.10, 0.20)   5050 97.85 98.89
(0.20, 0.30)   3298 97.08 98.40
(0.30, 0.40)   2780 96.80 98.18
(0.40, 0.50)   2567 96.41 97.84


                                                                              

Chunk 2, Epoch 20/100, Train Loss: 13413.9857, Val Loss: 7821.5342
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.11 99.52
(0.05, 0.10)   4779 98.55 99.24
(0.10, 0.20)   5050 97.90 98.84
(0.20, 0.30)   3298 97.13 98.31
(0.30, 0.40)   2780 96.85 98.08
(0.40, 0.50)   2567 96.47 97.76


                                                                             

Chunk 2, Epoch 21/100, Train Loss: 13381.3997, Val Loss: 8025.4541
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.13 99.54
(0.05, 0.10)   4779 98.58 99.27
(0.10, 0.20)   5050 97.92 98.85
(0.20, 0.30)   3298 97.17 98.29
(0.30, 0.40)   2780 96.90 98.10
(0.40, 0.50)   2567 96.51 97.72


                                                                             

Chunk 2, Epoch 22/100, Train Loss: 12789.5822, Val Loss: 6645.5029
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.16 99.54
(0.05, 0.10)   4779 98.62 99.28
(0.10, 0.20)   5050 97.98 98.93
(0.20, 0.30)   3298 97.23 98.44
(0.30, 0.40)   2780 96.96 98.27
(0.40, 0.50)   2567 96.57 97.98


                                                                              

Chunk 2, Epoch 23/100, Train Loss: 12242.0272, Val Loss: 6123.1865
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.17 99.57
(0.05, 0.10)   4779 98.65 99.35
(0.10, 0.20)   5050 98.02 98.96
(0.20, 0.30)   3298 97.28 98.46
(0.30, 0.40)   2780 97.02 98.33
(0.40, 0.50)   2567 96.63 98.05


                                                                             

Chunk 2, Epoch 24/100, Train Loss: 12050.1508, Val Loss: 6237.2910
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.18 99.56
(0.05, 0.10)   4779 98.66 99.33
(0.10, 0.20)   5050 98.02 98.97
(0.20, 0.30)   3298 97.28 98.44
(0.30, 0.40)   2780 97.02 98.29
(0.40, 0.50)   2567 96.64 97.97


                                                                             

Chunk 2, Epoch 25/100, Train Loss: 11809.0660, Val Loss: 6344.9863
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.20 99.58
(0.05, 0.10)   4779 98.69 99.33
(0.10, 0.20)   5050 98.08 98.92
(0.20, 0.30)   3298 97.36 98.44
(0.30, 0.40)   2780 97.07 98.17
(0.40, 0.50)   2567 96.69 97.95


                                                                              

Chunk 2, Epoch 26/100, Train Loss: 11235.6499, Val Loss: 6181.8115
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.21 99.57
(0.05, 0.10)   4779 98.73 99.36
(0.10, 0.20)   5050 98.11 98.95
(0.20, 0.30)   3298 97.37 98.48
(0.30, 0.40)   2780 97.10 98.32
(0.40, 0.50)   2567 96.72 98.02


                                                                             

Chunk 2, Epoch 27/100, Train Loss: 11181.1267, Val Loss: 5242.9062
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.22 99.59
(0.05, 0.10)   4779 98.73 99.39
(0.10, 0.20)   5050 98.12 99.01
(0.20, 0.30)   3298 97.40 98.56
(0.30, 0.40)   2780 97.14 98.41
(0.40, 0.50)   2567 96.75 98.14


                                                                              

Chunk 2, Epoch 28/100, Train Loss: 10712.0984, Val Loss: 6546.7021
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.24 99.59
(0.05, 0.10)   4779 98.76 99.34
(0.10, 0.20)   5050 98.15 98.93
(0.20, 0.30)   3298 97.45 98.47
(0.30, 0.40)   2780 97.18 98.03
(0.40, 0.50)   2567 96.81 97.83


                                                                              

Chunk 2, Epoch 29/100, Train Loss: 10533.4459, Val Loss: 4959.9629
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.24 99.61
(0.05, 0.10)   4779 98.77 99.41
(0.10, 0.20)   5050 98.16 99.02
(0.20, 0.30)   3298 97.46 98.59
(0.30, 0.40)   2780 97.20 98.47
(0.40, 0.50)   2567 96.82 98.17


                                                                              

Chunk 2, Epoch 30/100, Train Loss: 10291.4135, Val Loss: 5100.6670
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.26 99.60
(0.05, 0.10)   4779 98.81 99.42
(0.10, 0.20)   5050 98.19 99.02
(0.20, 0.30)   3298 97.49 98.54
(0.30, 0.40)   2780 97.23 98.39
(0.40, 0.50)   2567 96.87 98.17


                                                                              

Chunk 2, Epoch 31/100, Train Loss: 10173.2601, Val Loss: 4763.7861
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.26 99.62
(0.05, 0.10)   4779 98.82 99.43
(0.10, 0.20)   5050 98.22 99.06
(0.20, 0.30)   3298 97.52 98.60
(0.30, 0.40)   2780 97.25 98.49
(0.40, 0.50)   2567 96.90 98.19


                                                                              

Chunk 2, Epoch 32/100, Train Loss: 9731.0915, Val Loss: 4742.6670
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.28 99.62
(0.05, 0.10)   4779 98.84 99.43
(0.10, 0.20)   5050 98.25 99.05
(0.20, 0.30)   3298 97.55 98.59
(0.30, 0.40)   2780 97.30 98.47
(0.40, 0.50)   2567 96.92 98.20


                                                                              

Chunk 2, Epoch 33/100, Train Loss: 9604.0909, Val Loss: 4961.6631
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.29 99.63
(0.05, 0.10)   4779 98.86 99.43
(0.10, 0.20)   5050 98.26 99.03
(0.20, 0.30)   3298 97.56 98.56
(0.30, 0.40)   2780 97.30 98.46
(0.40, 0.50)   2567 96.94 98.18


                                                                              

Chunk 2, Epoch 34/100, Train Loss: 9495.9392, Val Loss: 4559.7490
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.29 99.62
(0.05, 0.10)   4779 98.86 99.43
(0.10, 0.20)   5050 98.27 99.06
(0.20, 0.30)   3298 97.57 98.62
(0.30, 0.40)   2780 97.31 98.44
(0.40, 0.50)   2567 96.96 98.23


                                                                              

Chunk 2, Epoch 35/100, Train Loss: 9214.6357, Val Loss: 3584.4541
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.30 99.64
(0.05, 0.10)   4779 98.89 99.46
(0.10, 0.20)   5050 98.29 99.13
(0.20, 0.30)   3298 97.61 98.71
(0.30, 0.40)   2780 97.35 98.59
(0.40, 0.50)   2567 96.98 98.35


                                                                              

Chunk 2, Epoch 36/100, Train Loss: 8947.7171, Val Loss: 3566.9326
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.31 99.65
(0.05, 0.10)   4779 98.90 99.49
(0.10, 0.20)   5050 98.31 99.14
(0.20, 0.30)   3298 97.63 98.73
(0.30, 0.40)   2780 97.37 98.62
(0.40, 0.50)   2567 97.02 98.34


                                                                              

Chunk 2, Epoch 37/100, Train Loss: 9062.1537, Val Loss: 4325.5508
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.31 99.64
(0.05, 0.10)   4779 98.90 99.47
(0.10, 0.20)   5050 98.31 99.10
(0.20, 0.30)   3298 97.64 98.63
(0.30, 0.40)   2780 97.38 98.51
(0.40, 0.50)   2567 97.02 98.26


                                                                              

Chunk 2, Epoch 38/100, Train Loss: 8496.0528, Val Loss: 3450.7480
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.33 99.66
(0.05, 0.10)   4779 98.93 99.49
(0.10, 0.20)   5050 98.35 99.14
(0.20, 0.30)   3298 97.66 98.75
(0.30, 0.40)   2780 97.42 98.61
(0.40, 0.50)   2567 97.06 98.36


                                                                              

Chunk 2, Epoch 39/100, Train Loss: 8656.4756, Val Loss: 3880.2598
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.33 99.65
(0.05, 0.10)   4779 98.94 99.47
(0.10, 0.20)   5050 98.34 99.10
(0.20, 0.30)   3298 97.68 98.67
(0.30, 0.40)   2780 97.42 98.58
(0.40, 0.50)   2567 97.07 98.31


                                                                              

Chunk 2, Epoch 40/100, Train Loss: 8259.8019, Val Loss: 3101.6758
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.34 99.66
(0.05, 0.10)   4779 98.94 99.51
(0.10, 0.20)   5050 98.36 99.14
(0.20, 0.30)   3298 97.70 98.76
(0.30, 0.40)   2780 97.45 98.66
(0.40, 0.50)   2567 97.10 98.39


                                                                              

Chunk 2, Epoch 41/100, Train Loss: 8220.9376, Val Loss: 3085.7295
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.34 99.67
(0.05, 0.10)   4779 98.95 99.50
(0.10, 0.20)   5050 98.36 99.16
(0.20, 0.30)   3298 97.70 98.77
(0.30, 0.40)   2780 97.44 98.67
(0.40, 0.50)   2567 97.09 98.35


                                                                              

Chunk 2, Epoch 42/100, Train Loss: 8101.3931, Val Loss: 3261.7529
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.35 99.66
(0.05, 0.10)   4779 98.96 99.51
(0.10, 0.20)   5050 98.38 99.14
(0.20, 0.30)   3298 97.72 98.73
(0.30, 0.40)   2780 97.48 98.66
(0.40, 0.50)   2567 97.12 98.40


                                                                              

Chunk 2, Epoch 43/100, Train Loss: 7978.3091, Val Loss: 3350.4756
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.36 99.67
(0.05, 0.10)   4779 98.98 99.52
(0.10, 0.20)   5050 98.39 99.14
(0.20, 0.30)   3298 97.75 98.71
(0.30, 0.40)   2780 97.49 98.62
(0.40, 0.50)   2567 97.13 98.37


                                                                              

Chunk 2, Epoch 44/100, Train Loss: 7916.7523, Val Loss: 2706.3184
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.36 99.67
(0.05, 0.10)   4779 98.98 99.52
(0.10, 0.20)   5050 98.41 99.18
(0.20, 0.30)   3298 97.75 98.78
(0.30, 0.40)   2780 97.51 98.72
(0.40, 0.50)   2567 97.15 98.45


                                                                              

Chunk 2, Epoch 45/100, Train Loss: 7555.9114, Val Loss: 2736.0225
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.37 99.68
(0.05, 0.10)   4779 98.99 99.52
(0.10, 0.20)   5050 98.42 99.17
(0.20, 0.30)   3298 97.77 98.80
(0.30, 0.40)   2780 97.53 98.72
(0.40, 0.50)   2567 97.17 98.47


                                                                              

Chunk 2, Epoch 46/100, Train Loss: 7398.5595, Val Loss: 2660.6279
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.38 99.68
(0.05, 0.10)   4779 99.01 99.53
(0.10, 0.20)   5050 98.44 99.19
(0.20, 0.30)   3298 97.79 98.79
(0.30, 0.40)   2780 97.55 98.74
(0.40, 0.50)   2567 97.20 98.45


                                                                              

Chunk 2, Epoch 47/100, Train Loss: 7427.5929, Val Loss: 2595.7012
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.38 99.69
(0.05, 0.10)   4779 99.01 99.55
(0.10, 0.20)   5050 98.44 99.20
(0.20, 0.30)   3298 97.80 98.81
(0.30, 0.40)   2780 97.55 98.74
(0.40, 0.50)   2567 97.21 98.47


                                                                              

Chunk 2, Epoch 48/100, Train Loss: 7420.5187, Val Loss: 2486.5625
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.38 99.68
(0.05, 0.10)   4779 99.02 99.54
(0.10, 0.20)   5050 98.44 99.20
(0.20, 0.30)   3298 97.79 98.83
(0.30, 0.40)   2780 97.55 98.75
(0.40, 0.50)   2567 97.20 98.48


                                                                              

Chunk 2, Epoch 49/100, Train Loss: 7127.0136, Val Loss: 2333.5713
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.39 99.69
(0.05, 0.10)   4779 99.03 99.55
(0.10, 0.20)   5050 98.45 99.21
(0.20, 0.30)   3298 97.83 98.83
(0.30, 0.40)   2780 97.58 98.75
(0.40, 0.50)   2567 97.23 98.53


                                                                              

Chunk 2, Epoch 50/100, Train Loss: 7054.8447, Val Loss: 2646.6006
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.40 99.68
(0.05, 0.10)   4779 99.04 99.50
(0.10, 0.20)   5050 98.47 99.16
(0.20, 0.30)   3298 97.84 98.79
(0.30, 0.40)   2780 97.60 98.72
(0.40, 0.50)   2567 97.24 98.44


                                                                              

Chunk 2, Epoch 51/100, Train Loss: 6928.7379, Val Loss: 2096.1152
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.40 99.70
(0.05, 0.10)   4779 99.04 99.54
(0.10, 0.20)   5050 98.47 99.23
(0.20, 0.30)   3298 97.85 98.84
(0.30, 0.40)   2780 97.59 98.82
(0.40, 0.50)   2567 97.25 98.56


                                                                              

Chunk 2, Epoch 52/100, Train Loss: 6932.0478, Val Loss: 2114.2158
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.40 99.70
(0.05, 0.10)   4779 99.05 99.56
(0.10, 0.20)   5050 98.48 99.23
(0.20, 0.30)   3298 97.86 98.87
(0.30, 0.40)   2780 97.60 98.79
(0.40, 0.50)   2567 97.28 98.56


                                                                              

Chunk 2, Epoch 53/100, Train Loss: 6796.3306, Val Loss: 2113.4053
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.41 99.70
(0.05, 0.10)   4779 99.06 99.55
(0.10, 0.20)   5050 98.49 99.23
(0.20, 0.30)   3298 97.86 98.84
(0.30, 0.40)   2780 97.62 98.79
(0.40, 0.50)   2567 97.27 98.53


                                                                              

Chunk 2, Epoch 54/100, Train Loss: 6819.6903, Val Loss: 2063.3594
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.41 99.70
(0.05, 0.10)   4779 99.06 99.55
(0.10, 0.20)   5050 98.50 99.22
(0.20, 0.30)   3298 97.89 98.87
(0.30, 0.40)   2780 97.63 98.80
(0.40, 0.50)   2567 97.29 98.59


                                                                              

Chunk 2, Epoch 55/100, Train Loss: 6708.5415, Val Loss: 2073.2188
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.41 99.71
(0.05, 0.10)   4779 99.06 99.56
(0.10, 0.20)   5050 98.49 99.22
(0.20, 0.30)   3298 97.89 98.85
(0.30, 0.40)   2780 97.64 98.77
(0.40, 0.50)   2567 97.29 98.56


                                                                              

Chunk 2, Epoch 56/100, Train Loss: 6327.2150, Val Loss: 1893.7109
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.42 99.69
(0.05, 0.10)   4779 99.08 99.56
(0.10, 0.20)   5050 98.51 99.25
(0.20, 0.30)   3298 97.90 98.86
(0.30, 0.40)   2780 97.66 98.83
(0.40, 0.50)   2567 97.32 98.58


                                                                              

Chunk 2, Epoch 57/100, Train Loss: 6489.6673, Val Loss: 1856.9844
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.43 99.70
(0.05, 0.10)   4779 99.09 99.57
(0.10, 0.20)   5050 98.53 99.25
(0.20, 0.30)   3298 97.91 98.85
(0.30, 0.40)   2780 97.67 98.84
(0.40, 0.50)   2567 97.34 98.56


                                                                              

Chunk 2, Epoch 58/100, Train Loss: 6436.8849, Val Loss: 1605.8320
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.43 99.71
(0.05, 0.10)   4779 99.08 99.57
(0.10, 0.20)   5050 98.52 99.27
(0.20, 0.30)   3298 97.90 98.90
(0.30, 0.40)   2780 97.66 98.82
(0.40, 0.50)   2567 97.33 98.62


                                                                              

Chunk 2, Epoch 59/100, Train Loss: 6303.6614, Val Loss: 1619.9365
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.43 99.71
(0.05, 0.10)   4779 99.10 99.57
(0.10, 0.20)   5050 98.54 99.26
(0.20, 0.30)   3298 97.93 98.90
(0.30, 0.40)   2780 97.69 98.83
(0.40, 0.50)   2567 97.35 98.61


                                                                              

Chunk 2, Epoch 60/100, Train Loss: 6186.5854, Val Loss: 1664.3086
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.44 99.71
(0.05, 0.10)   4779 99.10 99.57
(0.10, 0.20)   5050 98.54 99.25
(0.20, 0.30)   3298 97.93 98.88
(0.30, 0.40)   2780 97.69 98.84
(0.40, 0.50)   2567 97.34 98.62


                                                                              

Chunk 2, Epoch 61/100, Train Loss: 5839.4346, Val Loss: 1576.4668
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.44 99.72
(0.05, 0.10)   4779 99.11 99.57
(0.10, 0.20)   5050 98.56 99.27
(0.20, 0.30)   3298 97.95 98.91
(0.30, 0.40)   2780 97.72 98.84
(0.40, 0.50)   2567 97.38 98.63


                                                                              

Chunk 2, Epoch 62/100, Train Loss: 5936.9354, Val Loss: 1486.6484
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.44 99.72
(0.05, 0.10)   4779 99.11 99.59
(0.10, 0.20)   5050 98.56 99.28
(0.20, 0.30)   3298 97.94 98.90
(0.30, 0.40)   2780 97.71 98.86
(0.40, 0.50)   2567 97.38 98.62


                                                                              

Chunk 2, Epoch 63/100, Train Loss: 5881.2808, Val Loss: 1278.0244
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.45 99.73
(0.05, 0.10)   4779 99.11 99.59
(0.10, 0.20)   5050 98.56 99.27
(0.20, 0.30)   3298 97.96 98.93
(0.30, 0.40)   2780 97.72 98.89
(0.40, 0.50)   2567 97.38 98.65


                                                                              

Chunk 2, Epoch 64/100, Train Loss: 5663.1754, Val Loss: 1259.4062
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.45 99.72
(0.05, 0.10)   4779 99.13 99.59
(0.10, 0.20)   5050 98.58 99.30
(0.20, 0.30)   3298 97.99 98.92
(0.30, 0.40)   2780 97.75 98.87
(0.40, 0.50)   2567 97.41 98.68


                                                                              

Chunk 2, Epoch 65/100, Train Loss: 5912.9227, Val Loss: 1290.9678
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.46 99.72
(0.05, 0.10)   4779 99.13 99.59
(0.10, 0.20)   5050 98.58 99.29
(0.20, 0.30)   3298 97.97 98.92
(0.30, 0.40)   2780 97.73 98.89
(0.40, 0.50)   2567 97.40 98.65


                                                                              

Chunk 2, Epoch 66/100, Train Loss: 5839.1770, Val Loss: 1318.3145
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.45 99.71
(0.05, 0.10)   4779 99.13 99.58
(0.10, 0.20)   5050 98.57 99.29
(0.20, 0.30)   3298 97.97 98.91
(0.30, 0.40)   2780 97.74 98.91
(0.40, 0.50)   2567 97.41 98.70


                                                                              

Chunk 2, Epoch 67/100, Train Loss: 5719.5162, Val Loss: 1155.7549
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.46 99.73
(0.05, 0.10)   4779 99.14 99.58
(0.10, 0.20)   5050 98.59 99.30
(0.20, 0.30)   3298 97.99 98.94
(0.30, 0.40)   2780 97.74 98.91
(0.40, 0.50)   2567 97.41 98.69


                                                                              

Chunk 2, Epoch 68/100, Train Loss: 5443.9092, Val Loss: 1082.8545
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.46 99.73
(0.05, 0.10)   4779 99.13 99.59
(0.10, 0.20)   5050 98.59 99.30
(0.20, 0.30)   3298 97.99 98.95
(0.30, 0.40)   2780 97.77 98.93
(0.40, 0.50)   2567 97.42 98.72


                                                                              

Chunk 2, Epoch 69/100, Train Loss: 5377.4412, Val Loss: 1138.8125
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.47 99.73
(0.05, 0.10)   4779 99.16 99.59
(0.10, 0.20)   5050 98.61 99.28
(0.20, 0.30)   3298 98.01 98.94
(0.30, 0.40)   2780 97.78 98.92
(0.40, 0.50)   2567 97.44 98.72


                                                                              

Chunk 2, Epoch 70/100, Train Loss: 5430.2726, Val Loss: 1174.3896
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.47 99.72
(0.05, 0.10)   4779 99.16 99.60
(0.10, 0.20)   5050 98.61 99.30
(0.20, 0.30)   3298 98.03 98.97
(0.30, 0.40)   2780 97.78 98.94
(0.40, 0.50)   2567 97.45 98.69


                                                                              

Chunk 2, Epoch 71/100, Train Loss: 5668.0222, Val Loss: 1140.6230
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.47 99.73
(0.05, 0.10)   4779 99.15 99.59
(0.10, 0.20)   5050 98.59 99.30
(0.20, 0.30)   3298 98.01 98.95
(0.30, 0.40)   2780 97.76 98.90
(0.40, 0.50)   2567 97.43 98.69


                                                                              

Chunk 2, Epoch 72/100, Train Loss: 5152.3154, Val Loss: 1125.0586
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.48 99.72
(0.05, 0.10)   4779 99.16 99.61
(0.10, 0.20)   5050 98.62 99.30
(0.20, 0.30)   3298 98.03 98.93
(0.30, 0.40)   2780 97.78 98.95
(0.40, 0.50)   2567 97.45 98.72


                                                                              

Chunk 2, Epoch 73/100, Train Loss: 4257.8220, Val Loss: 421.6689
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.52 99.75
(0.05, 0.10)   4779 99.23 99.62
(0.10, 0.20)   5050 98.71 99.34
(0.20, 0.30)   3298 98.15 99.01
(0.30, 0.40)   2780 97.91 98.99
(0.40, 0.50)   2567 97.59 98.78


                                                                              

Chunk 2, Epoch 74/100, Train Loss: 4150.2847, Val Loss: 275.6475
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.52 99.76
(0.05, 0.10)   4779 99.24 99.63
(0.10, 0.20)   5050 98.71 99.36
(0.20, 0.30)   3298 98.16 99.02
(0.30, 0.40)   2780 97.93 98.99
(0.40, 0.50)   2567 97.60 98.78


                                                                              

Chunk 2, Epoch 75/100, Train Loss: 4000.4453, Val Loss: 202.3994
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.53 99.76
(0.05, 0.10)   4779 99.25 99.65
(0.10, 0.20)   5050 98.73 99.37
(0.20, 0.30)   3298 98.18 99.05
(0.30, 0.40)   2780 97.94 98.98
(0.40, 0.50)   2567 97.62 98.80


                                                                              

Chunk 2, Epoch 76/100, Train Loss: 4076.4901, Val Loss: 202.3701
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.53 99.76
(0.05, 0.10)   4779 99.26 99.64
(0.10, 0.20)   5050 98.74 99.35
(0.20, 0.30)   3298 98.19 99.05
(0.30, 0.40)   2780 97.95 99.01
(0.40, 0.50)   2567 97.63 98.81


                                                                              

Chunk 2, Epoch 77/100, Train Loss: 4010.2069, Val Loss: 151.6885
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.54 99.77
(0.05, 0.10)   4779 99.26 99.65
(0.10, 0.20)   5050 98.75 99.36
(0.20, 0.30)   3298 98.21 99.05
(0.30, 0.40)   2780 97.97 99.03
(0.40, 0.50)   2567 97.64 98.83


                                                                              

Chunk 2, Epoch 78/100, Train Loss: 3455.2377, Val Loss: 153.3281
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.54 99.76
(0.05, 0.10)   4779 99.27 99.66
(0.10, 0.20)   5050 98.74 99.37
(0.20, 0.30)   3298 98.20 99.06
(0.30, 0.40)   2780 97.97 99.00
(0.40, 0.50)   2567 97.64 98.82


                                                                              

Chunk 2, Epoch 79/100, Train Loss: 3670.4154, Val Loss: 126.6855
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.54 99.76
(0.05, 0.10)   4779 99.26 99.63
(0.10, 0.20)   5050 98.75 99.37
(0.20, 0.30)   3298 98.20 99.07
(0.30, 0.40)   2780 97.98 99.02
(0.40, 0.50)   2567 97.65 98.83


                                                                              

Chunk 2, Epoch 80/100, Train Loss: 3793.2300, Val Loss: 143.6943
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.54 99.76
(0.05, 0.10)   4779 99.26 99.64
(0.10, 0.20)   5050 98.74 99.36
(0.20, 0.30)   3298 98.20 99.07
(0.30, 0.40)   2780 97.96 99.03
(0.40, 0.50)   2567 97.63 98.83


                                                                              

Chunk 2, Epoch 81/100, Train Loss: 3876.7051, Val Loss: 104.2090
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.55 99.75
(0.05, 0.10)   4779 99.27 99.63
(0.10, 0.20)   5050 98.75 99.36
(0.20, 0.30)   3298 98.21 99.05
(0.30, 0.40)   2780 97.98 99.03
(0.40, 0.50)   2567 97.66 98.84


                                                                              

Chunk 2, Epoch 82/100, Train Loss: 3502.7708, Val Loss: 111.7861
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.55 99.76
(0.05, 0.10)   4779 99.28 99.63
(0.10, 0.20)   5050 98.76 99.37
(0.20, 0.30)   3298 98.24 99.08
(0.30, 0.40)   2780 97.98 99.04
(0.40, 0.50)   2567 97.67 98.86


                                                                              

Chunk 2, Epoch 83/100, Train Loss: 3785.3475, Val Loss: -60.3027
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.54 99.77
(0.05, 0.10)   4779 99.27 99.65
(0.10, 0.20)   5050 98.75 99.38
(0.20, 0.30)   3298 98.21 99.10
(0.30, 0.40)   2780 97.99 99.03
(0.40, 0.50)   2567 97.67 98.87


                                                                              

Chunk 2, Epoch 84/100, Train Loss: 3557.2252, Val Loss: -72.9277
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.55 99.78
(0.05, 0.10)   4779 99.27 99.66
(0.10, 0.20)   5050 98.77 99.39
(0.20, 0.30)   3298 98.23 99.10
(0.30, 0.40)   2780 97.99 99.04
(0.40, 0.50)   2567 97.67 98.86


                                                                              

Chunk 2, Epoch 85/100, Train Loss: 3547.0044, Val Loss: -100.4521
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.77
(0.05, 0.10)   4779 99.28 99.65
(0.10, 0.20)   5050 98.77 99.38
(0.20, 0.30)   3298 98.23 99.10
(0.30, 0.40)   2780 98.00 99.05
(0.40, 0.50)   2567 97.69 98.86


                                                                              

Chunk 2, Epoch 86/100, Train Loss: 3712.5487, Val Loss: -62.0811
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.55 99.77
(0.05, 0.10)   4779 99.28 99.64
(0.10, 0.20)   5050 98.76 99.40
(0.20, 0.30)   3298 98.23 99.09
(0.30, 0.40)   2780 97.99 99.06
(0.40, 0.50)   2567 97.67 98.87


                                                                              

Chunk 2, Epoch 87/100, Train Loss: 3494.6185, Val Loss: -91.4180
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.55 99.76
(0.05, 0.10)   4779 99.28 99.65
(0.10, 0.20)   5050 98.76 99.38
(0.20, 0.30)   3298 98.23 99.08
(0.30, 0.40)   2780 98.00 99.06
(0.40, 0.50)   2567 97.68 98.88


                                                                              

Chunk 2, Epoch 88/100, Train Loss: 3422.9738, Val Loss: -90.5186
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.77
(0.05, 0.10)   4779 99.29 99.66
(0.10, 0.20)   5050 98.77 99.38
(0.20, 0.30)   3298 98.23 99.10
(0.30, 0.40)   2780 98.01 99.06
(0.40, 0.50)   2567 97.69 98.86


                                                                              

Chunk 2, Epoch 89/100, Train Loss: 3768.8570, Val Loss: -211.8574
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.77
(0.05, 0.10)   4779 99.28 99.65
(0.10, 0.20)   5050 98.77 99.40
(0.20, 0.30)   3298 98.23 99.11
(0.30, 0.40)   2780 98.00 99.06
(0.40, 0.50)   2567 97.68 98.89


                                                                              

Chunk 2, Epoch 90/100, Train Loss: 3118.6634, Val Loss: -162.3486
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.77
(0.05, 0.10)   4779 99.28 99.66
(0.10, 0.20)   5050 98.77 99.39
(0.20, 0.30)   3298 98.23 99.11
(0.30, 0.40)   2780 98.00 99.07
(0.40, 0.50)   2567 97.69 98.90


                                                                              

Chunk 2, Epoch 91/100, Train Loss: 3369.7257, Val Loss: -156.2432
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.76
(0.05, 0.10)   4779 99.29 99.64
(0.10, 0.20)   5050 98.78 99.38
(0.20, 0.30)   3298 98.24 99.09
(0.30, 0.40)   2780 98.02 99.07
(0.40, 0.50)   2567 97.70 98.89


                                                                              

Chunk 2, Epoch 92/100, Train Loss: 3238.5517, Val Loss: -52.1768
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.76
(0.05, 0.10)   4779 99.29 99.63
(0.10, 0.20)   5050 98.79 99.38
(0.20, 0.30)   3298 98.26 99.10
(0.30, 0.40)   2780 98.02 99.07
(0.40, 0.50)   2567 97.69 98.90


                                                                              

Chunk 2, Epoch 93/100, Train Loss: 3289.3541, Val Loss: -269.7568
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.77
(0.05, 0.10)   4779 99.29 99.67
(0.10, 0.20)   5050 98.79 99.40
(0.20, 0.30)   3298 98.26 99.12
(0.30, 0.40)   2780 98.02 99.06
(0.40, 0.50)   2567 97.72 98.90


                                                                              

Chunk 2, Epoch 94/100, Train Loss: 3237.1860, Val Loss: -185.4941
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.57 99.77
(0.05, 0.10)   4779 99.30 99.65
(0.10, 0.20)   5050 98.79 99.39
(0.20, 0.30)   3298 98.25 99.12
(0.30, 0.40)   2780 98.02 99.08
(0.40, 0.50)   2567 97.71 98.90


                                                                              

Chunk 2, Epoch 95/100, Train Loss: 3156.1706, Val Loss: -220.2246
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.56 99.76
(0.05, 0.10)   4779 99.29 99.65
(0.10, 0.20)   5050 98.79 99.40
(0.20, 0.30)   3298 98.26 99.10
(0.30, 0.40)   2780 98.03 99.07
(0.40, 0.50)   2567 97.71 98.90


                                                                              

Chunk 2, Epoch 96/100, Train Loss: 3183.8362, Val Loss: -250.4766
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.57 99.77
(0.05, 0.10)   4779 99.30 99.66
(0.10, 0.20)   5050 98.79 99.40
(0.20, 0.30)   3298 98.27 99.10
(0.30, 0.40)   2780 98.03 99.08
(0.40, 0.50)   2567 97.72 98.91


                                                                              

Chunk 2, Epoch 97/100, Train Loss: 3204.6829, Val Loss: -390.9805
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.57 99.78
(0.05, 0.10)   4779 99.30 99.66
(0.10, 0.20)   5050 98.79 99.41
(0.20, 0.30)   3298 98.27 99.11
(0.30, 0.40)   2780 98.04 99.09
(0.40, 0.50)   2567 97.73 98.90


                                                                              

Chunk 2, Epoch 98/100, Train Loss: 3319.7594, Val Loss: -273.1973
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.57 99.76
(0.05, 0.10)   4779 99.30 99.65
(0.10, 0.20)   5050 98.80 99.41
(0.20, 0.30)   3298 98.27 99.14
(0.30, 0.40)   2780 98.03 99.07
(0.40, 0.50)   2567 97.73 98.90


                                                                              

Chunk 2, Epoch 99/100, Train Loss: 3075.7796, Val Loss: -317.3301
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.57 99.77
(0.05, 0.10)   4779 99.30 99.65
(0.10, 0.20)   5050 98.79 99.41
(0.20, 0.30)   3298 98.25 99.12
(0.30, 0.40)   2780 98.03 99.08
(0.40, 0.50)   2567 97.70 98.92


                                                                               

Chunk 2, Epoch 100/100, Train Loss: 3094.1580, Val Loss: -349.4004
     MAF_bin Counts Train   Val
(0.00, 0.05)  15300 99.57 99.78
(0.05, 0.10)   4779 99.31 99.66
(0.10, 0.20)   5050 98.80 99.40
(0.20, 0.30)   3298 98.26 99.16
(0.30, 0.40)   2780 98.04 99.07
(0.40, 0.50)   2567 97.74 98.91
