-
Notifications
You must be signed in to change notification settings - Fork 6
/
train.yml
executable file
·71 lines (71 loc) · 2.32 KB
/
train.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
---
ops: [train]
model: {
path: ../model/sei.py,
class: Sei,
class_args: {
sequence_length: 4096,
n_genomic_features: 21907,
},
non_strand_specific: mean
}
sampler: !obj:selene_sdk.samplers.MultiSampler {
train_sampler: !obj:selene_sdk.samplers.dataloader.SamplerDataLoader {
sampler: !obj:selene_sdk.samplers.RandomPositionsSampler {
target_path: ./data/sorted_sei_data.bed.gz,
reference_sequence: !obj:selene_sdk.sequences.Genome {
input_path: ../resources/hg38_UCSC.fa,
blacklist_regions: hg38
},
features: !obj:selene_sdk.utils.load_features_list {
input_path: ./data/sei_chromatin_profiles.txt
},
test_holdout: [chr8, chr9],
validation_holdout: [chr10],
sequence_length: 4096,
center_bin_to_predict: [2048, 2049],
feature_thresholds: null,
save_datasets: []
},
num_workers: 16,
batch_size: 64,
},
validate_sampler: !obj:selene_sdk.samplers.RandomPositionsSampler {
target_path: ./data/sorted_sei_data.bed.gz,
reference_sequence: !obj:selene_sdk.sequences.Genome {
input_path: ../resources/hg38_UCSC.fa,
blacklist_regions: hg38
},
features: !obj:selene_sdk.utils.load_features_list {
input_path: ./data/sei_chromatin_profiles.txt
},
test_holdout: [chr8, chr9],
validation_holdout: [chr10],
sequence_length: 4096,
center_bin_to_predict: [2048, 2049],
mode: validate,
save_datasets: []
},
features: !obj:selene_sdk.utils.load_features_list {
input_path: ./data/sei_chromatin_profiles.txt
}
}
train_model: !obj:selene_sdk.TrainModel {
batch_size: 64,
report_stats_every_n_steps: 5000,
n_validation_samples: 12800,
n_test_samples: 1600000,
use_cuda: True,
data_parallel: True, #we recommend multi-GPU training only on NVLink-enabled GPUs
cpu_n_threads: 19,
report_gt_feature_n_positives: 5,
use_scheduler: False,
max_steps: 1000000000,
metrics: {
roc_auc: !import sklearn.metrics.roc_auc_score,
average_precision: !import sklearn.metrics.average_precision_score
},
}
output_dir: ./models
random_seed: 1447
...