-
Notifications
You must be signed in to change notification settings - Fork 30
/
slim_sim.cpp
11911 lines (9556 loc) · 544 KB
/
slim_sim.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//
// slim_sim.cpp
// SLiM
//
// Created by Ben Haller on 12/26/14.
// Copyright (c) 2014-2020 Philipp Messer. All rights reserved.
// A product of the Messer Lab, http://messerlab.org/slim/
//
// This file is part of SLiM.
//
// SLiM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
//
// SLiM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with SLiM. If not, see <http://www.gnu.org/licenses/>.
#include "slim_sim.h"
#include "slim_functions.h"
#include "eidos_test.h"
#include "eidos_interpreter.h"
#include "eidos_call_signature.h"
#include "eidos_property_signature.h"
#include "eidos_ast_node.h"
#include "individual.h"
#include "polymorphism.h"
#include "subpopulation.h"
#include <iostream>
#include <iomanip>
#include <fstream>
#include <stdexcept>
#include <algorithm>
#include <typeinfo>
#include <memory>
#include <string>
#include <utility>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <unordered_set>
#include <unordered_map>
#include <float.h>
#include <ctime>
//TREE SEQUENCE
#include <stdio.h>
#include <stdlib.h>
#include "json.hpp"
#include <sys/utsname.h>
//TREE SEQUENCE
//INCLUDE MSPRIME.H FOR THE CROSSCHECK CODE; NEEDS TO BE MOVED TO TSKIT
// the tskit header is not designed to be included from C++, so we have to protect it...
#ifdef __cplusplus
extern "C" {
#endif
#include "kastore.h"
#include "../treerec/tskit/trees.h"
#include "../treerec/tskit/tables.h"
#include "../treerec/tskit/genotypes.h"
#include "../treerec/tskit/text_input.h"
#ifdef __cplusplus
}
#endif
// This is the version written to the provenance table of .trees files
static const char *SLIM_TREES_FILE_VERSION_INITIAL __attribute__((unused)) = "0.1"; // SLiM 3.0, before the Inidividual table, etc.; UNSUPPORTED
static const char *SLIM_TREES_FILE_VERSION_PRENUC = "0.2"; // before introduction of nucleotides
static const char *SLIM_TREES_FILE_VERSION = "0.3"; // SLiM 3.3 onward, with the added nucleotide field in MutationMetadataRec
#pragma mark -
#pragma mark SLiMSim
#pragma mark -
SLiMSim::SLiMSim(std::istream &p_infile) : chromosome_(this), population_(*this), self_symbol_(gID_sim, EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Object_singleton(this, gSLiM_SLiMSim_Class))), x_experiments_enabled_(false)
{
// set up the symbol table we will use for all of our constants; we use the external hash table design
// BCH 1/18/2018: I looked into telling this table to use the external unordered_map from the start, but testing indicates
// that that is actually a bit slower. I suspect it crosses over for models with more SLiM symbols; but EidosSymbolTable
// crosses over to the external table anyway when more symbols are used, so it shouldn't be a big deal.
simulation_constants_ = new EidosSymbolTable(EidosSymbolTableType::kContextConstantsTable, gEidosConstantsSymbolTable);
// set up the function map with the base Eidos functions plus zero-gen functions, since we're in an initial state
simulation_functions_ = *EidosInterpreter::BuiltInFunctionMap();
AddZeroGenerationFunctionsToMap(simulation_functions_);
AddSLiMFunctionsToMap(simulation_functions_);
// read all configuration information from the input file
p_infile.clear();
p_infile.seekg(0, std::fstream::beg);
try {
InitializeFromFile(p_infile);
}
catch (...) {
// try to clean up what we've allocated so far
delete simulation_constants_;
simulation_constants_ = nullptr;
simulation_functions_.clear();
throw;
}
}
SLiMSim::~SLiMSim(void)
{
//EIDOS_ERRSTREAM << "SLiMSim::~SLiMSim" << std::endl;
population_.RemoveAllSubpopulationInfo();
delete simulation_constants_;
simulation_constants_ = nullptr;
simulation_functions_.clear();
for (auto mutation_type : mutation_types_)
delete mutation_type.second;
mutation_types_.clear();
for (auto genomic_element_type : genomic_element_types_)
delete genomic_element_type.second;
genomic_element_types_.clear();
for (auto interaction_type : interaction_types_)
delete interaction_type.second;
interaction_types_.clear();
for (auto script_block : script_blocks_)
delete script_block;
script_blocks_.clear();
// All the script blocks that refer to the script are now gone
delete script_;
script_ = nullptr;
// Dispose of mutation run experiment data
if (x_experiments_enabled_)
{
if (x_current_runtimes_)
free(x_current_runtimes_);
x_current_runtimes_ = nullptr;
if (x_previous_runtimes_)
free(x_previous_runtimes_);
x_previous_runtimes_ = nullptr;
}
// TREE SEQUENCE RECORDING
if (RecordingTreeSequence())
FreeTreeSequence();
}
void SLiMSim::InitializeRNGFromSeed(unsigned long int *p_override_seed_ptr)
{
// track the random number seed given, if there is one
unsigned long int rng_seed = (p_override_seed_ptr ? *p_override_seed_ptr : Eidos_GenerateSeedFromPIDAndTime());
Eidos_InitializeRNG();
Eidos_SetRNGSeed(rng_seed);
if (SLiM_verbosity_level >= 1)
SLIM_OUTSTREAM << "// Initial random seed:\n" << rng_seed << "\n" << std::endl;
// remember the original seed for .trees provenance
original_seed_ = rng_seed;
}
void SLiMSim::InitializeFromFile(std::istream &p_infile)
{
// Reset error position indicators used by SLiMgui
EidosScript::ClearErrorPosition();
// Read in the file; going through stringstream is fast...
std::stringstream buffer;
buffer << p_infile.rdbuf();
// Tokenize and parse
// BCH 5/1/2019: Note that this script_ variable may leak if tokenization/parsing raises below, because this method
// is called while the SLiMSim constructor is still in progress, so the destructor is not called to clean up. But
// we can't actually clean up this variable, because it is used by SLiMAssertScriptRaise() to diagnose where the raise
// occurred in the user's script; we'd have to redesign that code to fix this leak. So be it. It's not a large leak.
script_ = new SLiMEidosScript(buffer.str());
// Set up top-level error-reporting info
gEidosCurrentScript = script_;
gEidosExecutingRuntimeScript = false;
script_->Tokenize();
script_->ParseSLiMFileToAST();
// Extract SLiMEidosBlocks from the parse tree
const EidosASTNode *root_node = script_->AST();
for (EidosASTNode *script_block_node : root_node->children_)
{
SLiMEidosBlock *new_script_block = new SLiMEidosBlock(script_block_node);
AddScriptBlock(new_script_block, nullptr, new_script_block->root_node_->children_[0]->token_);
}
// Reset error position indicators used by SLiMgui
EidosScript::ClearErrorPosition();
// Zero out error-reporting info so raises elsewhere don't get attributed to this script
gEidosCurrentScript = nullptr;
gEidosExecutingRuntimeScript = false;
}
// get one line of input, sanitizing by removing comments and whitespace; used only by SLiMSim::InitializePopulationFromTextFile
void GetInputLine(std::istream &p_input_file, std::string &p_line);
void GetInputLine(std::istream &p_input_file, std::string &p_line)
{
getline(p_input_file, p_line);
// remove all after "//", the comment start sequence
// BCH 16 Dec 2014: note this was "/" in SLiM 1.8 and earlier, changed to allow full filesystem paths to be specified.
if (p_line.find("//") != std::string::npos)
p_line.erase(p_line.find("//"));
// remove leading and trailing whitespace (spaces and tabs)
p_line.erase(0, p_line.find_first_not_of(" \t"));
p_line.erase(p_line.find_last_not_of(" \t") + 1);
}
SLiMFileFormat SLiMSim::FormatOfPopulationFile(const std::string &p_file_string)
{
if (p_file_string.length())
{
// p_file should have had its trailing slash stripped already, and a leading ~ should have been resolved
// we will check those assumptions here for safety...
if (p_file_string[0] == '~')
EIDOS_TERMINATION << "ERROR (SLiMSim::FormatOfPopulationFile): (internal error) leading ~ in path was not resolved." << EidosTerminate();
if (p_file_string.back() == '/')
EIDOS_TERMINATION << "ERROR (SLiMSim::FormatOfPopulationFile): (internal error) trailing / in path was not stripped." << EidosTerminate();
// First determine if the path is for a file or a directory
const char *file_cstr = p_file_string.c_str();
struct stat statbuf;
if (stat(file_cstr, &statbuf) != 0)
return SLiMFileFormat::kFileNotFound;
if (S_ISDIR(statbuf.st_mode))
{
// The path is for a whole directory. The only file format we recognize that is directory-based is the
// tskit text (i.e. non-binary) format, which requires files with specific names inside; let's check.
// The files should be named EdgeTable.txt, NodeTable.txt, SiteTable.txt, etc.; we require all files to
// be present, for simplicity.
std::string NodeFileName = p_file_string + "/NodeTable.txt";
std::string EdgeFileName = p_file_string + "/EdgeTable.txt";
std::string SiteFileName = p_file_string + "/SiteTable.txt";
std::string MutationFileName = p_file_string + "/MutationTable.txt";
std::string IndividualFileName = p_file_string + "/IndividualTable.txt";
std::string PopulationFileName = p_file_string + "/PopulationTable.txt";
std::string ProvenanceFileName = p_file_string + "/ProvenanceTable.txt";
if (stat(NodeFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
if (stat(EdgeFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
if (stat(SiteFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
if (stat(MutationFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
if (stat(IndividualFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
if (stat(PopulationFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
if (stat(ProvenanceFileName.c_str(), &statbuf) != 0) return SLiMFileFormat::kFormatUnrecognized;
if (!S_ISREG(statbuf.st_mode)) return SLiMFileFormat::kFormatUnrecognized;
return SLiMFileFormat::kFormatTskitText;
}
else if (S_ISREG(statbuf.st_mode))
{
// The path is for a file. It could be a SLiM text file, SLiM binary file, or tskit binary file; we
// determine which using the leading 4 bytes of the file. This heuristic will need to be adjusted
// if/when these file formats change (such as going off of HD5 in the tskit file format).
std::ifstream infile(file_cstr, std::ios::in | std::ios::binary);
if (!infile.is_open() || infile.eof())
return SLiMFileFormat::kFileNotFound;
// Determine the file length
infile.seekg(0, std::ios_base::end);
std::size_t file_size = infile.tellg();
// Determine the file format
if (file_size >= 4)
{
char file_chars[4] = {0, 0, 0, 0};
uint32_t file_endianness_tag = 0;
infile.seekg(0, std::ios_base::beg);
infile.read(&file_chars[0], 4);
infile.seekg(0, std::ios_base::beg);
infile.read(reinterpret_cast<char *>(&file_endianness_tag), sizeof file_endianness_tag);
if ((file_chars[0] == '#') && (file_chars[1] == 'O') && (file_chars[2] == 'U') && (file_chars[3] == 'T'))
return SLiMFileFormat::kFormatSLiMText;
else if (file_endianness_tag == 0x12345678)
return SLiMFileFormat::kFormatSLiMBinary;
else if (file_endianness_tag == 0x46444889) // 'âHDF', the prefix for HDF5 files apparently; reinterpreted via endianness
return SLiMFileFormat::kFormatTskitBinary_HDF5;
else if (file_endianness_tag == 0x53414B89) // 'âKAS', the prefix for kastore files apparently; reinterpreted via endianness
return SLiMFileFormat::kFormatTskitBinary_kastore;
}
}
}
return SLiMFileFormat::kFormatUnrecognized;
}
slim_generation_t SLiMSim::InitializePopulationFromFile(const std::string &p_file_string, EidosInterpreter *p_interpreter)
{
SLiMFileFormat file_format = FormatOfPopulationFile(p_file_string);
if (file_format == SLiMFileFormat::kFileNotFound)
EIDOS_TERMINATION << "ERROR (SLiMSim::InitializePopulationFromFile): initialization file does not exist or is empty." << EidosTerminate();
if (file_format == SLiMFileFormat::kFormatUnrecognized)
EIDOS_TERMINATION << "ERROR (SLiMSim::InitializePopulationFromFile): initialization file is invalid." << EidosTerminate();
// first we clear out all variables of type Subpopulation etc. from the symbol table; they will all be invalid momentarily
// note that we do this not only in our constants table, but in the user's variables as well; we can leave no stone unturned
// FIXME: Note that we presently have no way of clearing out EidosScribe/SLiMgui references (the variable browser, in particular),
// and so EidosConsoleWindowController has to do an ugly and only partly effective hack to work around this issue.
if (p_interpreter)
{
EidosSymbolTable &symbols = p_interpreter->SymbolTable();
std::vector<std::string> all_symbols = symbols.AllSymbols();
std::vector<EidosGlobalStringID> symbols_to_remove;
for (std::string symbol_name : all_symbols)
{
EidosGlobalStringID symbol_ID = Eidos_GlobalStringIDForString(symbol_name);
EidosValue_SP symbol_value = symbols.GetValueOrRaiseForSymbol(symbol_ID);
if (symbol_value->Type() == EidosValueType::kValueObject)
{
const EidosObjectClass *symbol_class = static_pointer_cast<EidosValue_Object>(symbol_value)->Class();
if ((symbol_class == gSLiM_Subpopulation_Class) || (symbol_class == gSLiM_Genome_Class) || (symbol_class == gSLiM_Individual_Class) || (symbol_class == gSLiM_Mutation_Class) || (symbol_class == gSLiM_Substitution_Class))
symbols_to_remove.emplace_back(symbol_ID);
}
}
for (EidosGlobalStringID symbol_ID : symbols_to_remove)
symbols.RemoveConstantForSymbol(symbol_ID);
}
// invalidate interactions, since any cached interaction data depends on the subpopulations and individuals
for (auto int_type = interaction_types_.begin(); int_type != interaction_types_.end(); ++int_type)
int_type->second->Invalidate();
// then we dispose of all existing subpopulations, mutations, etc.
population_.RemoveAllSubpopulationInfo();
const char *file_cstr = p_file_string.c_str();
slim_generation_t new_generation = 0;
// Read in the file. The SLiM file-reading methods are not tree-sequence-aware, so we bracket them
// with calls that fix the tree sequence recording state around them. The treeSeq output methods
// are of course treeSeq-aware, so we don't need to do that for them.
if ((file_format == SLiMFileFormat::kFormatSLiMText) || (file_format == SLiMFileFormat::kFormatSLiMBinary))
{
// TREE SEQUENCE RECORDING
if (RecordingTreeSequence())
{
FreeTreeSequence();
AllocateTreeSequenceTables();
}
if (file_format == SLiMFileFormat::kFormatSLiMText)
new_generation = _InitializePopulationFromTextFile(file_cstr, p_interpreter);
else if (file_format == SLiMFileFormat::kFormatSLiMBinary)
new_generation = _InitializePopulationFromBinaryFile(file_cstr, p_interpreter);
// TREE SEQUENCE RECORDING
if (RecordingTreeSequence())
{
// set up all of the mutations we just read in with the tree-seq recording code
RecordAllDerivedStatesFromSLiM();
// reset our tree-seq auto-simplification interval so we don't simplify immediately
simplify_elapsed_ = 0;
// reset our last coalescence state; we don't know whether we're coalesced now or not
last_coalescence_state_ = false;
}
}
else if (file_format == SLiMFileFormat::kFormatTskitText)
new_generation = _InitializePopulationFromTskitTextFile(file_cstr, p_interpreter);
else if (file_format == SLiMFileFormat::kFormatTskitBinary_kastore)
new_generation = _InitializePopulationFromTskitBinaryFile(file_cstr, p_interpreter);
else if (file_format == SLiMFileFormat::kFormatTskitBinary_HDF5)
EIDOS_TERMINATION << "ERROR (SLiMSim::InitializePopulationFromFile): msprime HDF5 binary files are not supported; that file format has been superseded by kastore." << EidosTerminate();
else
EIDOS_TERMINATION << "ERROR (SLiMSim::InitializePopulationFromFile): unrecognized format code." << EidosTerminate();
return new_generation;
}
slim_generation_t SLiMSim::_InitializePopulationFromTextFile(const char *p_file, EidosInterpreter *p_interpreter)
{
slim_generation_t file_generation;
std::map<slim_polymorphismid_t,MutationIndex> mutations;
std::string line, sub;
std::ifstream infile(p_file);
int age_output_count = 0;
if (!infile.is_open())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): could not open initialization file." << EidosTerminate();
// Parse the first line, to get the generation
{
GetInputLine(infile, line);
std::istringstream iss(line);
iss >> sub; // #OUT:
iss >> sub; // generation
int64_t generation_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
file_generation = SLiMCastToGenerationTypeOrRaise(generation_long);
}
// As of SLiM 2.1, we change the generation as a side effect of loading; otherwise we can't correctly update our state here!
// As of SLiM 3, we set the generation up here, before making any individuals, because we need it to be correct for the tree-seq recording code.
SetGeneration(file_generation);
// Read and ignore initial stuff until we hit the Populations section
int64_t file_version = 0; // initially unknown; we will leave this as 0 for versions < 3, for now
while (!infile.eof())
{
GetInputLine(infile, line);
// Starting in SLiM 3, we will handle a Version line if we see one in passing
if (line.find("Version:") != std::string::npos)
{
std::istringstream iss(line);
iss >> sub; // Version:
iss >> sub; // version number
file_version = (int64_t)EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
// version 4 is the same as version 3 but with an age value for each individual
if (file_version == 4)
{
age_output_count = 1;
file_version = 3;
}
if ((file_version != 1) && (file_version != 2) && (file_version != 3))
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): unrecognized version." << EidosTerminate();
continue;
}
if (line.find("Populations") != std::string::npos)
break;
}
if (age_output_count && (ModelType() == SLiMModelType::kModelTypeWF))
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): age information is present but the simulation is using a WF model." << EidosTerminate();
if (!age_output_count && (ModelType() == SLiMModelType::kModelTypeNonWF))
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): age information is not present but the simulation is using a nonWF model; age information must be included." << EidosTerminate();
// Now we are in the Populations section; read and instantiate each population until we hit the Mutations section
while (!infile.eof())
{
GetInputLine(infile, line);
if (line.length() == 0)
continue;
if (line.find("Mutations") != std::string::npos)
break;
std::istringstream iss(line);
iss >> sub;
slim_objectid_t subpop_index = SLiMEidosScript::ExtractIDFromStringWithPrefix(sub, 'p', nullptr);
iss >> sub;
int64_t subpop_size_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
slim_popsize_t subpop_size = SLiMCastToPopsizeTypeOrRaise(subpop_size_long);
// SLiM 2.0 output format has <H | S <ratio>> here; if that is missing or "H" is given, the population is hermaphroditic and the ratio given is irrelevant
double sex_ratio = 0.0;
if (iss >> sub)
{
if (sub == "S")
{
iss >> sub;
sex_ratio = EidosInterpreter::FloatForString(sub, nullptr);
}
}
// Create the population population
Subpopulation *new_subpop = population_.AddSubpopulation(subpop_index, subpop_size, sex_ratio);
// define a new Eidos variable to refer to the new subpopulation
EidosSymbolTableEntry &symbol_entry = new_subpop->SymbolTableEntry();
if (p_interpreter && p_interpreter->SymbolTable().ContainsSymbol(symbol_entry.first))
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): new subpopulation symbol " << Eidos_StringForGlobalStringID(symbol_entry.first) << " was already defined prior to its definition here." << EidosTerminate();
simulation_constants_->InitializeConstantSymbolEntry(symbol_entry);
}
// Now we are in the Mutations section; read and instantiate all mutations and add them to our map and to the registry
while (!infile.eof())
{
GetInputLine(infile, line);
if (line.length() == 0)
continue;
if (line.find("Genomes") != std::string::npos)
break;
if (line.find("Individuals") != std::string::npos) // SLiM 2.0 added this section
break;
std::istringstream iss(line);
iss >> sub;
int64_t polymorphismid_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
slim_polymorphismid_t polymorphism_id = SLiMCastToPolymorphismidTypeOrRaise(polymorphismid_long);
// Added in version 2 output, starting in SLiM 2.1
iss >> sub;
slim_mutationid_t mutation_id;
if (sub[0] == 'm') // autodetect whether we are parsing version 1 or version 2 output
{
mutation_id = polymorphism_id; // when parsing version 1 output, we use the polymorphism id as the mutation id
}
else
{
mutation_id = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
iss >> sub; // queue up sub for mutation_type_id
}
slim_objectid_t mutation_type_id = SLiMEidosScript::ExtractIDFromStringWithPrefix(sub, 'm', nullptr);
iss >> sub;
int64_t position_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
slim_position_t position = SLiMCastToPositionTypeOrRaise(position_long);
iss >> sub;
double selection_coeff = EidosInterpreter::FloatForString(sub, nullptr);
iss >> sub; // dominance coefficient, which is given in the mutation type; we check below that the value read matches the mutation type
double dominance_coeff = EidosInterpreter::FloatForString(sub, nullptr);
iss >> sub;
slim_objectid_t subpop_index = SLiMEidosScript::ExtractIDFromStringWithPrefix(sub, 'p', nullptr);
iss >> sub;
int64_t generation_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
slim_generation_t generation = SLiMCastToGenerationTypeOrRaise(generation_long);
iss >> sub; // prevalence, which we discard
int8_t nucleotide = -1;
if (iss && !iss.eof())
{
// fetch the nucleotide field if it is present
iss >> sub;
if (sub == "A") nucleotide = 0;
else if (sub == "C") nucleotide = 1;
else if (sub == "G") nucleotide = 2;
else if (sub == "T") nucleotide = 3;
else EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): unrecognized value '"<< sub << "' in nucleotide field." << EidosTerminate();
}
// look up the mutation type from its index
auto found_muttype_pair = mutation_types_.find(mutation_type_id);
if (found_muttype_pair == mutation_types_.end())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): mutation type m"<< mutation_type_id << " has not been defined." << EidosTerminate();
MutationType *mutation_type_ptr = found_muttype_pair->second;
if (fabs(mutation_type_ptr->dominance_coeff_ - dominance_coeff) > 0.001) // a reasonable tolerance to allow for I/O roundoff
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): mutation type m"<< mutation_type_id << " has dominance coefficient " << mutation_type_ptr->dominance_coeff_ << " that does not match the population file dominance coefficient of " << dominance_coeff << "." << EidosTerminate();
if ((nucleotide == -1) && mutation_type_ptr->nucleotide_based_)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): mutation type m"<< mutation_type_id << " is nucleotide-based, but a nucleotide value for a mutation of this type was not supplied." << EidosTerminate();
if ((nucleotide != -1) && !mutation_type_ptr->nucleotide_based_)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): mutation type m"<< mutation_type_id << " is not nucleotide-based, but a nucleotide value for a mutation of this type was supplied." << EidosTerminate();
// construct the new mutation; NOTE THAT THE STACKING POLICY IS NOT CHECKED HERE, AS THIS IS NOT CONSIDERED THE ADDITION OF A MUTATION!
MutationIndex new_mut_index = SLiM_NewMutationFromBlock();
new (gSLiM_Mutation_Block + new_mut_index) Mutation(mutation_id, mutation_type_ptr, position, selection_coeff, subpop_index, generation, nucleotide);
// add it to our local map, so we can find it when making genomes, and to the population's mutation registry
mutations.insert(std::pair<slim_polymorphismid_t,MutationIndex>(polymorphism_id, new_mut_index));
population_.mutation_registry_.emplace_back(new_mut_index);
#ifdef SLIM_KEEP_MUTTYPE_REGISTRIES
if (population_.keeping_muttype_registries_)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): (internal error) separate muttype registries set up during pop load." << EidosTerminate();
#endif
// all mutations seen here will be added to the simulation somewhere, so check and set pure_neutral_ and all_pure_neutral_DFE_
if (selection_coeff != 0.0)
{
pure_neutral_ = false;
mutation_type_ptr->all_pure_neutral_DFE_ = false;
}
}
population_.cached_tally_genome_count_ = 0;
// If there is an Individuals section (added in SLiM 2.0), we now need to parse it since it might contain spatial positions
if (line.find("Individuals") != std::string::npos)
{
while (!infile.eof())
{
GetInputLine(infile, line);
if (line.length() == 0)
continue;
if (line.find("Genomes") != std::string::npos)
break;
std::istringstream iss(line);
iss >> sub; // pX:iY – individual identifier
int pos = static_cast<int>(sub.find_first_of(":"));
std::string &&subpop_id_string = sub.substr(0, pos);
slim_objectid_t subpop_id = SLiMEidosScript::ExtractIDFromStringWithPrefix(subpop_id_string, 'p', nullptr);
std::string &&individual_index_string = sub.substr(pos + 1, std::string::npos);
if (individual_index_string[0] != 'i')
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): reference to individual is malformed." << EidosTerminate();
int64_t individual_index = EidosInterpreter::NonnegativeIntegerForString(individual_index_string.c_str() + 1, nullptr);
auto subpop_pair = population_.subpops_.find(subpop_id);
if (subpop_pair == population_.subpops_.end())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): referenced subpopulation p" << subpop_id << " not defined." << EidosTerminate();
Subpopulation &subpop = *subpop_pair->second;
if (individual_index >= subpop.parent_subpop_size_)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): referenced individual i" << individual_index << " is out of range." << EidosTerminate();
Individual &individual = *subpop.parent_individuals_[individual_index];
iss >> sub; // individual sex identifier (F/M/H) – added in SLiM 2.1, so we need to be robust if it is missing
if ((sub == "F") || (sub == "M") || (sub == "H"))
iss >> sub;
; // pX:Y – genome 1 identifier, which we do not presently need to parse [already fetched]
iss >> sub; // pX:Y – genome 2 identifier, which we do not presently need to parse
// Parse the optional fields at the end of each individual line. This is a bit tricky.
// First we read all of the fields in, then we decide how to use them.
std::vector<std::string> opt_params;
int opt_param_count;
while (iss >> sub)
opt_params.push_back(sub);
opt_param_count = (int)opt_params.size();
if (opt_param_count == 0)
{
// no optional info present, which might be an error; should never occur unless someone has hand-constructed a bad input file
if (age_output_count)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): output file format does not contain age information, which is required." << EidosTerminate();
}
#ifdef SLIM_NONWF_ONLY
else if (opt_param_count == age_output_count)
{
// only age information is present
individual.age_ = (slim_age_t)EidosInterpreter::NonnegativeIntegerForString(opt_params[0], nullptr); // age
}
#endif // SLIM_NONWF_ONLY
else if (opt_param_count == spatial_dimensionality_ + age_output_count)
{
// age information is present, in addition to the correct number of spatial positions
if (spatial_dimensionality_ >= 1)
individual.spatial_x_ = EidosInterpreter::FloatForString(opt_params[0], nullptr); // spatial position x
if (spatial_dimensionality_ >= 2)
individual.spatial_y_ = EidosInterpreter::FloatForString(opt_params[1], nullptr); // spatial position y
if (spatial_dimensionality_ >= 3)
individual.spatial_z_ = EidosInterpreter::FloatForString(opt_params[2], nullptr); // spatial position z
#ifdef SLIM_NONWF_ONLY
if (age_output_count)
individual.age_ = (slim_age_t)EidosInterpreter::NonnegativeIntegerForString(opt_params[spatial_dimensionality_], nullptr); // age
#endif // SLIM_NONWF_ONLY
}
else
{
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): output file format does not match that expected by the simulation (spatial dimension or age information is incorrect or missing)." << EidosTerminate();
}
}
}
// Now we are in the Genomes section, which should take us to the end of the file unless there is an Ancestral Sequence section
Mutation *mut_block_ptr = gSLiM_Mutation_Block;
while (!infile.eof())
{
GetInputLine(infile, line);
if (line.length() == 0)
continue;
if (line.find("Ancestral sequence") != std::string::npos)
break;
std::istringstream iss(line);
iss >> sub;
int pos = static_cast<int>(sub.find_first_of(":"));
std::string &&subpop_id_string = sub.substr(0, pos);
slim_objectid_t subpop_id = SLiMEidosScript::ExtractIDFromStringWithPrefix(subpop_id_string, 'p', nullptr);
auto subpop_pair = population_.subpops_.find(subpop_id);
if (subpop_pair == population_.subpops_.end())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): referenced subpopulation p" << subpop_id << " not defined." << EidosTerminate();
Subpopulation &subpop = *subpop_pair->second;
sub.erase(0, pos + 1); // remove the subpop_id and the colon
int64_t genome_index_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
if ((genome_index_long < 0) || (genome_index_long > SLIM_MAX_SUBPOP_SIZE * 2))
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): genome index out of permitted range." << EidosTerminate();
slim_popsize_t genome_index = static_cast<slim_popsize_t>(genome_index_long); // range-check is above since we need to check against SLIM_MAX_SUBPOP_SIZE * 2
Genome &genome = *subpop.parent_genomes_[genome_index];
// Now we might have [A|X|Y] (SLiM 2.0), or we might have the first mutation id - or we might have nothing at all
if (iss >> sub)
{
// check whether this token is a genome type
if ((sub.compare(gStr_A) == 0) || (sub.compare(gStr_X) == 0) || (sub.compare(gStr_Y) == 0))
{
// Let's do a little error-checking against what has already been instantiated for us...
if ((sub.compare(gStr_A) == 0) && genome.Type() != GenomeType::kAutosome)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): genome is specified as A (autosome), but the instantiated genome does not match." << EidosTerminate();
if ((sub.compare(gStr_X) == 0) && genome.Type() != GenomeType::kXChromosome)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): genome is specified as X (X-chromosome), but the instantiated genome does not match." << EidosTerminate();
if ((sub.compare(gStr_Y) == 0) && genome.Type() != GenomeType::kYChromosome)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): genome is specified as Y (Y-chromosome), but the instantiated genome does not match." << EidosTerminate();
if (iss >> sub)
{
if (sub == "<null>")
{
if (!genome.IsNull())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): genome is specified as null, but the instantiated genome is non-null." << EidosTerminate();
continue; // this line is over
}
else
{
if (genome.IsNull())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): genome is specified as non-null, but the instantiated genome is null." << EidosTerminate();
// drop through, and sub will be interpreted as a mutation id below
}
}
else
continue;
}
slim_position_t mutrun_length_ = genome.mutrun_length_;
slim_mutrun_index_t current_mutrun_index = -1;
MutationRun *current_mutrun = nullptr;
do
{
int64_t polymorphismid_long = EidosInterpreter::NonnegativeIntegerForString(sub, nullptr);
slim_polymorphismid_t polymorphism_id = SLiMCastToPolymorphismidTypeOrRaise(polymorphismid_long);
auto found_mut_pair = mutations.find(polymorphism_id);
if (found_mut_pair == mutations.end())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromTextFile): polymorphism " << polymorphism_id << " has not been defined." << EidosTerminate();
MutationIndex mutation = found_mut_pair->second;
slim_mutrun_index_t mutrun_index = (slim_mutrun_index_t)((mut_block_ptr + mutation)->position_ / mutrun_length_);
if (mutrun_index != current_mutrun_index)
{
current_mutrun_index = mutrun_index;
genome.WillModifyRun(current_mutrun_index);
current_mutrun = genome.mutruns_[mutrun_index].get();
}
current_mutrun->emplace_back(mutation);
}
while (iss >> sub);
}
}
// Now we are in the Ancestral sequence section, which should take us to the end of the file
// Conveniently, NucleotideArray supports operator>> to read nucleotides until the EOF
if (line.find("Ancestral sequence") != std::string::npos)
{
infile >> *(chromosome_.AncestralSequence());
}
// It's a little unclear how we ought to clean up after ourselves, and this is a continuing source of bugs. We could be loading
// a new population in an early() event, in a late() event, or in between generations in SLiMgui using its Import Population command.
// The safest avenue seems to be to just do all the bookkeeping we can think of: tally frequencies, calculate fitnesses, and
// survey the population for SLiMgui. This will lead to some of these actions being done at an unusual time in the generation cycle,
// though, and will cause some things to be done unnecessarily (because they are not normally up-to-date at the current generation
// cycle stage anyway) or done twice (which could be particularly problematic for fitness() callbacks). Nevertheless, this seems
// like the best policy, at least until shown otherwise... BCH 11 June 2016
// BCH 5 April 2017: Well, it has been shown otherwise. Now that interactions have been added, fitness() callbacks often depend on
// them, which means the interactions need to be evaluated, which means we can't evaluate fitness values yet; we need to give the
// user's script a chance to evaluate the interactions. This was always a problem, really; fitness() callbacks might have needed
// some external state to be set up that would be on the population state. But now it is a glaring problem, and forces us to revise
// our policy. For backward compatibility, we will keep the old behavior if reading a file that is version 2 or earlier; a bit
// weird, but probably nobody will ever even notice...
// Re-tally mutation references so we have accurate frequency counts for our new mutations
population_.UniqueMutationRuns();
population_.TallyMutationReferences(nullptr, true);
if (file_version <= 2)
{
// Now that we have the info on everybody, update fitnesses so that we're ready to run the next generation
// used to be generation + 1; removing that 18 Feb 2016 BCH
nonneutral_change_counter_++; // trigger unconditional nonneutral mutation caching inside UpdateFitness()
last_nonneutral_regime_ = 3; // this means "unpredictable callbacks", will trigger a recache next generation
for (auto muttype_iter : mutation_types_)
(muttype_iter.second)->subject_to_fitness_callback_ = true; // we're not doing RecalculateFitness()'s work, so play it safe
SLiMEidosBlockType old_executing_block_type = executing_block_type_;
executing_block_type_ = SLiMEidosBlockType::SLiMEidosFitnessCallback; // used for both fitness() and fitness(NULL) for simplicity
for (std::pair<const slim_objectid_t,Subpopulation*> &subpop_pair : population_.subpops_)
{
slim_objectid_t subpop_id = subpop_pair.first;
Subpopulation *subpop = subpop_pair.second;
std::vector<SLiMEidosBlock*> fitness_callbacks = ScriptBlocksMatching(generation_, SLiMEidosBlockType::SLiMEidosFitnessCallback, -1, -1, subpop_id);
std::vector<SLiMEidosBlock*> global_fitness_callbacks = ScriptBlocksMatching(generation_, SLiMEidosBlockType::SLiMEidosFitnessGlobalCallback, -2, -1, subpop_id);
subpop->UpdateFitness(fitness_callbacks, global_fitness_callbacks);
}
executing_block_type_ = old_executing_block_type;
#ifdef SLIMGUI
// Let SLiMgui survey the population for mean fitness and such, if it is our target
population_.SurveyPopulation();
#endif
}
return file_generation;
}
#ifndef __clang_analyzer__
slim_generation_t SLiMSim::_InitializePopulationFromBinaryFile(const char *p_file, EidosInterpreter *p_interpreter)
{
std::size_t file_size = 0;
slim_generation_t file_generation;
int32_t spatial_output_count;
int age_output_count = 0;
bool has_nucleotides = false;
// Read file into buf
std::ifstream infile(p_file, std::ios::in | std::ios::binary);
if (!infile.is_open() || infile.eof())
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromBinaryFile): could not open initialization file." << EidosTerminate();
// Determine the file length
infile.seekg(0, std::ios_base::end);
file_size = infile.tellg();
// Read in the entire file; we assume we have enough memory, for now
std::unique_ptr<char[]> raii_buf(new char[file_size]);
char *buf = raii_buf.get();
if (!buf)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromBinaryFile): could not allocate input buffer." << EidosTerminate();
char *buf_end = buf + file_size;
char *p = buf;
infile.seekg(0, std::ios_base::beg);
infile.read(buf, file_size);
// Close the file; we will work only with our buffer from here on
infile.close();
int32_t section_end_tag;
int32_t file_version;
// Header beginning, to check endianness and determine file version
{
int32_t endianness_tag, version_tag;
if (p + sizeof(endianness_tag) + sizeof(version_tag) > buf_end)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromBinaryFile): unexpected EOF while reading header." << EidosTerminate();
endianness_tag = *(int32_t *)p;
p += sizeof(endianness_tag);
version_tag = *(int32_t *)p;
p += sizeof(version_tag);
if (endianness_tag != 0x12345678)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromBinaryFile): endianness mismatch." << EidosTerminate();
// version 4 is the same as version 3 but with an age value for each individual
if (version_tag == 4)
{
age_output_count = 1;
version_tag = 3;
}
if ((version_tag != 1) && (version_tag != 2) && (version_tag != 3) && (version_tag != 5))
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromBinaryFile): unrecognized version." << EidosTerminate();
file_version = version_tag;
}
// Header section
{
int32_t double_size;
double double_test;
int32_t slim_generation_t_size, slim_position_t_size, slim_objectid_t_size, slim_popsize_t_size, slim_refcount_t_size, slim_selcoeff_t_size, slim_mutationid_t_size, slim_polymorphismid_t_size;
int64_t flags = 0;
int header_length = sizeof(double_size) + sizeof(double_test) + sizeof(slim_generation_t_size) + sizeof(slim_position_t_size) + sizeof(slim_objectid_t_size) + sizeof(slim_popsize_t_size) + sizeof(slim_refcount_t_size) + sizeof(slim_selcoeff_t_size) + sizeof(file_generation) + sizeof(section_end_tag);
if (file_version >= 2)
header_length += sizeof(slim_mutationid_t_size) + sizeof(slim_polymorphismid_t_size);
if (file_version >= 5)
header_length += sizeof(flags);
if (p + header_length > buf_end)
EIDOS_TERMINATION << "ERROR (SLiMSim::_InitializePopulationFromBinaryFile): unexpected EOF while reading header." << EidosTerminate();
double_size = *(int32_t *)p;
p += sizeof(double_size);
double_test = *(double *)p;
p += sizeof(double_test);
if (file_version >= 5)
{
flags = *(int64_t *)p;
p += sizeof(flags);
if (flags & 0x01) age_output_count = 1;
if (flags & 0x02) has_nucleotides = true;
}
slim_generation_t_size = *(int32_t *)p;
p += sizeof(slim_generation_t_size);
slim_position_t_size = *(int32_t *)p;
p += sizeof(slim_position_t_size);
slim_objectid_t_size = *(int32_t *)p;
p += sizeof(slim_objectid_t_size);
slim_popsize_t_size = *(int32_t *)p;
p += sizeof(slim_popsize_t_size);
slim_refcount_t_size = *(int32_t *)p;
p += sizeof(slim_refcount_t_size);
slim_selcoeff_t_size = *(int32_t *)p;
p += sizeof(slim_selcoeff_t_size);