diff --git a/.gitignore b/.gitignore
index 24b7e8a..da2398b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,5 @@ Makevars
 
 testscripts.R
 testscript_op
+tests
+
diff --git a/R/BacGWES.R b/R/BacGWES.R
index e16de33..9362189 100644
--- a/R/BacGWES.R
+++ b/R/BacGWES.R
@@ -33,7 +33,8 @@
 #' @param ncores specify the number of cores to use for parallel processing. Auto detect (detect = NULL)
 #' @param max_blk_sz specify maximum block size for MI computation (default = 10000), larger sizes require more RAM, range 1000 - 100000
 #' @param save_additional_outputs specify whether to save outputs such as extracted SNPs and Hamming distance weights. Recommended for very large datasets to save time on re-computation (default = F)
-#' @param mega_dset specify whether the datasets is megascale. This mode requires spam and spam64 packages. This is upto 5 times slower, set to TRUE only if the normal analysis fails (default = F)
+#' @param mega_dset specify whether the datasets is megascale. This mode requires spam and spam64 packages. This is  >5 times slower, set to TRUE only if the normal analysis fails (default = F)
+
 #'
 #' @return All generated outputs will be saved to folder <dset>.
 #'
@@ -53,6 +54,12 @@
 #' aln_path <- system.file("extdata", "snp_sample.fa.gz", package = "LDWeaver")
 #' pos <- as.numeric(readLines(system.file("extdata", "snp_sample.fa.pos", package = "LDWeaver")))
 #' LDWeaver::LDWeaver(dset = dset,  aln_path = aln_path, aln_has_all_bases = F, pos = pos, gbk_path = gbk_path)
+#'
+# Example 3 - Redoing the full analysis as a mega scale dataset
+#' dset <- "full_dset_spam"
+#' gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver")
+#' aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver")
+#' LDWeaver::LDWeaver(dset = dset,  aln_path = aln_path,  gbk_path = gbk_path, validate_ref_ann_lengths = F, mega_dset = T)
 #' }
 #' @export
 LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path = NULL, gff3_path = NULL,
@@ -175,6 +182,9 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
       message("mega_dset is set to TRUE but spam and spam64 dependencies are not installed.")
       return(invisible())
     }
+    message("mega_dset is selected. Warning! This mode has a much slower run time. Setting spam.force64=TRUE (see https://cran.r-project.org/web/packages/spam64/spam64.pdf)")
+    options(spam.force64 = TRUE)
+
   }
 
 
@@ -235,7 +245,7 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
     if(perform_SR_analysis_only) cat("Only short-range analysis requested. \n")
     cat(paste("All outputs will be saved to:", normalizePath(dset), "\n"))
     cat(paste("\n *** Input paths *** \n\n"))
-    cat(paste("* Alignment:", aln_path, "\n"))
+    if(mega_dset) cat(paste("* Mega Alignment:", aln_path, "\n")) else cat(paste("* Alignment:", aln_path, "\n"))
     if(!is.null(gbk_path)) {
       cat(paste("* GenBank Annotation:", gbk_path, "\n"))
       cat(paste("* Parser built using genbankr source (https://github.com/gmbecker/genbankr) \n"))
@@ -254,7 +264,7 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
     cat(paste("Links <=", sr_dist, "bp-apart will be classified as short-range (sr-links) \n"))
     if(!perform_SR_analysis_only) cat(paste("Approx. top", lr_retain_links, "long range links will be saved \n"))
     cat(paste("Top sr-links with -log10(p) >", srp_cutoff, "will be saved \n"))
-    cat(paste("Tanglegram/GWESExplorer outputs will illustrate upto:", max_tophits, "top sr-links \n"))
+    if(!is.null(tanglegram_break_segments)) cat(paste("Tanglegram/GWESExplorer outputs will illustrate upto:", max_tophits, "top sr-links \n"))
     cat(paste("MI Computation will use a max block size of:", max_blk_sz, "x", max_blk_sz, "SNPs! Reduce <max_blk_sz> if RAM is scarce!\n\n"))
     cat(paste("~~~~~ https://github.com/Sudaraka88/LDWeaver/ ~~~~~"))
   }
@@ -269,13 +279,13 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
 
     # Adding support for SNP-only alignments
     if(aln_has_all_bases == T){
-      snp.dat = LDWeaver::parse_fasta_alignment(aln_path = aln_path, method = snp_filt_method, gap_freq = gap_freq, maf_freq = maf_freq)
+      snp.dat = LDWeaver::parse_fasta_alignment(aln_path = aln_path, method = snp_filt_method, gap_freq = gap_freq, maf_freq = maf_freq, mega_dset = mega_dset)
       if(save_additional_outputs){
         cat("Step 5: Savings snp.dat...")
         saveRDS(snp.dat, ACGTN_snp_path)
       }
     } else {
-      snp.dat = LDWeaver::parse_fasta_SNP_alignment(aln_path = aln_path, pos = pos, method = snp_filt_method, gap_freq = gap_freq, maf_freq = maf_freq)
+      snp.dat = LDWeaver::parse_fasta_SNP_alignment(aln_path = aln_path, pos = pos, method = snp_filt_method, gap_freq = gap_freq, maf_freq = maf_freq, mega_dset = mega_dset)
       # Note that snp.dat$g = NULL (we cannot measure this, need to get it from the genbank file)
       # we cannot save snp.dat here due to absent snp.dat$g, moving downstream (block 2)
     }
@@ -375,7 +385,8 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
                                                 lr_save_path = lr_save_path, sr_save_path = sr_save_path,
                                                 plt_folder = dset, sr_dist = sr_dist, lr_retain_links = lr_retain_links,
                                                 max_blk_sz = max_blk_sz, srp_cutoff = srp_cutoff, runARACNE = T,
-                                                perform_SR_analysis_only = perform_SR_analysis_only, order_links = order_links)
+                                                perform_SR_analysis_only = perform_SR_analysis_only,
+                                                order_links = order_links,mega_dset = mega_dset)
   }
 
 
@@ -436,7 +447,9 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
     gwesexplorer_path = file.path(dset, "SR_GWESExplorer")
     if(!file.exists(gwesexplorer_path)) dir.create(gwesexplorer_path)
     cat("\n\n #################### BLOCK 10 #################### \n\n")
-    LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits, gwes_explorer_folder = gwesexplorer_path)
+    if(mega_dset) {
+      message("GWES Explorer output currently not generated for mega datasets\n")
+      } else LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits, gwes_explorer_folder = gwesexplorer_path)
   }
 
 
@@ -455,13 +468,13 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path
       if( !(  (file.exists(file.path(dset, "lr_tophits.tsv"))) | (file.exists(file.path(dset, "Tophits/lr_tophits.tsv"))) ) ) { # if the annotated_links file exists, no need to run again
         LDWeaver::analyse_long_range_links(dset = dset, lr_links_path =  lr_save_path, sr_links_path = sr_save_path, SnpEff_Annotate = T, snpeff_jar_path = snpeff_jar_path,
                                            gbk_path = gbk_path, gff3_path = gff3_path, snp.dat = snp.dat, cds_var = cds_var, ref_fasta_path = ref_fasta_path,
-                                           validate_ref_ann_lengths = validate_ref_ann_lengths)
+                                           validate_ref_ann_lengths = validate_ref_ann_lengths, mega_dset = mega_dset)
       } else {
         cat("Results from previous LR anlayis exist!")
       }
     } else {
       if( !(  (file.exists(file.path(dset, "lr_gwes.png"))) | (file.exists(file.path(dset, "GWESPlots/lr_gwes.png"))) ) ) { # if the lr_gwes plot exist, no need to run again
-        LDWeaver::analyse_long_range_links(dset = dset, lr_links_path =  lr_save_path, sr_links_path = sr_save_path, SnpEff_Annotate = F)
+        LDWeaver::analyse_long_range_links(dset = dset, lr_links_path =  lr_save_path, sr_links_path = sr_save_path, SnpEff_Annotate = F, mega_dset = mega_dset)
       } else {
         cat("Results from previous LR anlayis exist!")
       }
diff --git a/R/computePairwiseMI.R b/R/computePairwiseMI.R
index 0a5f43b..690b219 100644
--- a/R/computePairwiseMI.R
+++ b/R/computePairwiseMI.R
@@ -46,6 +46,14 @@
 perform_MI_computation = function(snp.dat, hdw, cds_var, ncores, lr_save_path = NULL, sr_save_path = NULL, plt_folder = NULL,
                                   sr_dist = 20000, lr_retain_links = 1e6, max_blk_sz = 10000, srp_cutoff = 3, runARACNE = TRUE,
                                   perform_SR_analysis_only = FALSE, order_links = T, mega_dset = F){
+
+  ## DEBUG LINES - DO NOT DELETE and REMEMBER TO COMMENT
+  # lr_save_path = "testscript_op/lr_links_spam.tsv"
+  # sr_save_path = "testscript_op/sr_links_spam.tsv"
+  # plt_folder = "testscript_op"
+  # sr_dist = 20000; lr_retain_links = 1e6; max_blk_sz = 10000; srp_cutoff = 3
+  # Rcpp::sourceCpp("src/computeMI.cpp"); Rcpp::sourceCpp("src/fintersect.cpp")
+
   t000 = Sys.time()
   # TODO: if no paths are given, we need a way to stop overwriting (use timestamp()?)
   if(is.null(lr_save_path)) lr_save_path = file.path(getwd(), "lr_links.tsv")
diff --git a/R/extractSNPs.R b/R/extractSNPs.R
index 1e90799..f839cd0 100644
--- a/R/extractSNPs.R
+++ b/R/extractSNPs.R
@@ -53,6 +53,10 @@ parse_fasta_alignment <- function(aln_path, gap_freq = 0.15, maf_freq = 0.01, me
       message("This feature requires spam and spam64 packages.")
       return(invisible())
     } else {
+      # We need to make sure we are using spam64, set it quietly
+      if(!getOption("spam.force64")) options(spam.force64 = T)
+
+
       snp.matrix_A <- spam::spam(list(i=snp.data$i_A, j=snp.data$j_A, values=as.logical(snp.data$x_A)),
                                  nrow = snp.param$num.seqs, ncol = snp.param$num.snps)
       snp.data$i_A = snp.data$j_A = snp.data$x_A = NULL
@@ -185,6 +189,10 @@ parse_fasta_SNP_alignment <- function(aln_path, pos, gap_freq = 0.15, maf_freq =
       message("This feature requires spam and spam64 packages.")
       return(invisible())
     } else {
+
+      # We need to make sure we are using spam64, set it quietly
+      if(!getOption("spam.force64")) options(spam.force64 = T)
+
       snp.matrix_A <- spam::spam(list(i=snp.data$i_A, j=snp.data$j_A, values=as.logical(snp.data$x_A)),
                                  nrow = snp.param$num.seqs, ncol = snp.param$num.snps)
       snp.data$i_A = snp.data$j_A = snp.data$x_A = NULL
diff --git a/R/lr_analyser.R b/R/lr_analyser.R
index 3d3f749..42b8de8 100644
--- a/R/lr_analyser.R
+++ b/R/lr_analyser.R
@@ -20,6 +20,7 @@
 #' @param max_tophits specify the maximum number of long range links to save as <lr_tophits.tsv>. Note: all short-range links will be annotated (and saved separately),
 #' but only the top <max_tophits> will be used for visualisation (default = 500)
 #' @param links_from_spydrpick are the links computed using spydrpick (default = F)
+#' @param mega_dset set TRUE for mega scale datasets (default = F)
 #'
 #' @examples
 #' \dontrun{
@@ -29,8 +30,8 @@
 analyse_long_range_links = function(dset, lr_links_path, sr_links_path, are_lrlinks_ordered = F, SnpEff_Annotate = F,
                                     snpeff_jar_path = NULL, gbk_path = NULL, gff3_path = NULL, ref_fasta_path = NULL,
                                     validate_ref_ann_lengths = T, snp.dat = NULL, cds_var = NULL, max_tophits = 500,
-                                    links_from_spydrpick = F){
-                                    # tanglegram_break_segments = 5){
+                                    links_from_spydrpick = F, mega_dset = F){
+  # tanglegram_break_segments = 5){
 
   #TODO: We are redoing the SnpEff annotation for long-range links, might be better to do it in one run
   # it makes sense to have a larger max_tophits for long range links - there will be a lot more of long-range links compared to short
@@ -158,9 +159,9 @@ analyse_long_range_links = function(dset, lr_links_path, sr_links_path, are_lrli
     }
 
     tophits = LDWeaver::perform_snpEff_annotations(dset_name = dset, annotation_folder = file.path(getwd(), dset),
-                                                  snpeff_jar = snpeff_jar_path, gbk = gbk, gbk_path = gbk_path,
-                                                  gff = gff, cds_var = cds_var, links_df = lr_links_red, snp.dat = snp.dat,
-                                                  tophits_path = tophits_path, max_tophits = max_tophits, links_type = "LR")
+                                                   snpeff_jar = snpeff_jar_path, gbk = gbk, gbk_path = gbk_path,
+                                                   gff = gff, cds_var = cds_var, links_df = lr_links_red, snp.dat = snp.dat,
+                                                   tophits_path = tophits_path, max_tophits = max_tophits, links_type = "LR")
 
     # Tanglegram is difficult to read when plotted like this, best to avoid!
     # tanglegram_path = file.path(dset, "LR_Tanglegram")
@@ -171,8 +172,12 @@ analyse_long_range_links = function(dset, lr_links_path, sr_links_path, are_lrli
     cat("\n")
     gwesexplorer_path = file.path(dset, "LR_GWESExplorer")
     if(!file.exists(gwesexplorer_path)) dir.create(gwesexplorer_path)
-    LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits,
-                                            gwes_explorer_folder = gwesexplorer_path, links_type = "LR")
+    if(mega_dset) {
+      message("GWES Explorer output currently not generated for mega datasets\n")
+    } else LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits,
+                                                    gwes_explorer_folder = gwesexplorer_path, links_type = "LR")
+
+
 
     cat("\n")
 
diff --git a/README.md b/README.md
index 1145fb6..79c89c9 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-## Genomewide Co-selection and Epistasis in Bacteria <img src='images/icon.jpg' align="right" height="155" />
-
 <!-- badges: start -->
 
 [![R](https://github.com/Sudaraka88/LDWeaver/workflows/R-CMD-check/badge.svg)](https://github.com/Sudaraka88/LDWeaver/actions)
@@ -7,7 +5,8 @@
 [![LICESNSE](https://anaconda.org/bioconda/r-ldweaver/badges/license.svg)](https://spdx.org/licenses/GPL-3.0-or-later.html)
 <!-- badges: end -->
 
-## About
+## Genomewide Co-selection and Epistasis in Bacteria <img src='images/icon_dark.jpg' align="right" height="225" />
+
 
 LDWeaver accepts a sequence alignment (fasta) and its reference annotation 
 (genbank or gff) as inputs and identifies linkage disequilibrium (LD) between 
@@ -88,7 +87,7 @@ aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver")
 gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver")
 snp_filt_method = "relaxed"
 LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, validate_ref_ann_lengths = F,
-        num_clusts_CDS = 2, SnpEff_Annotate = F, snp_filt_method = snp_filt_method)
+num_clusts_CDS = 2, SnpEff_Annotate = F, snp_filt_method = snp_filt_method)
 ```
 
 >**Note** If you are using a SNP-only alignment, set `aln_has_all_bases = F` and provide `pos`, a numeric vector of SNP positions. Each SNP in the SNP-only alignment must have a unique SNP position.
@@ -125,9 +124,9 @@ aln_path <- "spn23f_msch.aln.gz"
 gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver")
 
 LDWeaver::LDWeaver(dset = dset, 
-                   aln_path = aln_path, 
-                   gbk_path = gbk_path, 
-                   save_additional_outputs = T)
+aln_path = aln_path, 
+gbk_path = gbk_path, 
+save_additional_outputs = T)
 ```
 
 `LDWeaver::LDWeaver()` one-liner is versatile for most 
@@ -149,9 +148,9 @@ ncores = parallel::detectCores()
 snp.dat = LDWeaver::parse_fasta_alignment(aln_path = aln_path) # parse the alignment and extract SNPs
 gbk = LDWeaver::parse_genbank_file(gbk_path = gbk_path, g = snp.dat$g) # parse the annotation
 cds_var = LDWeaver::estimate_variation_in_CDS(gbk = gbk, snp.dat = snp.dat, 
-                                             ncores = ncores, 
-                                             num_clusts_CDS = 3, 
-                                             clust_plt_path = "msch/CDS_clustering.png")
+ncores = ncores, 
+num_clusts_CDS = 3, 
+clust_plt_path = "msch/CDS_clustering.png")
 ```
 
 ![](inst/sup/CDS_clustering.png)
@@ -161,10 +160,10 @@ hdw = LDWeaver::estimate_Hamming_distance_weights(snp.dat = snp.dat) # Hamming d
 
 # Perform MI computation model fitting and ARACNE - this will take some time...
 sr_links = LDWeaver::perform_MI_computation(snp.dat = snp.dat, hdw = hdw,
-                                            cds_var = cds_var, ncores = ncores,
-                                            lr_save_path = "msch/lr_links.tsv", 
-                                            sr_save_path = "msch/sr_links.tsv",
-                                            plt_folder = dset)
+cds_var = cds_var, ncores = ncores,
+lr_save_path = "msch/lr_links.tsv", 
+sr_save_path = "msch/sr_links.tsv",
+plt_folder = dset)
 ```
 
 ![](inst/sup/c1_fit.png) ![](inst/sup/c2_fit.png)
@@ -180,9 +179,9 @@ LDWeaver::make_gwes_plots(sr_links = sr_links, plt_folder = dset)
 ``` r
 # Identify the top hits by performing snpEff annotations
 tophits = LDWeaver::perform_snpEff_annotations(dset_name = dset, annotation_folder = file.path(getwd(), dset), 
-                                               gbk = gbk, gbk_path = gbk_path, cds_var = cds_var, 
-                                               links_df = sr_links, snp.dat = snp.dat, 
-                                               tophits_path = "msch/sr_tophits.tsv")
+gbk = gbk, gbk_path = gbk_path, cds_var = cds_var, 
+links_df = sr_links, snp.dat = snp.dat, 
+tophits_path = "msch/sr_tophits.tsv")
 ```
 
 This will generate several outputs comprising annotations into the
@@ -199,7 +198,7 @@ should look like this: ![](inst/sup/Tanglegram_screenshot.png)
 ``` r
 # Generate GWES Explorer outputs
 LDWeaver::write_output_for_gwes_explorer(snp.dat = snp.dat, tophits = tophits, 
-                                         gwes_explorer_folder = "msch/SR_GWESExplorer")
+gwes_explorer_folder = "msch/SR_GWESExplorer")
 ```
 
 Above line will create three files in \<msch/SR_GWESExplorer/\> that can be
@@ -214,8 +213,8 @@ Next step is to analyse the long range links
 ``` r
 # Analyse long range links
 LDWeaver::analyse_long_range_links(dset = dset, lr_links_path = "msch/lr_links.tsv", 
-                                   sr_links_path = "msch/sr_links.tsv", SnpEff_Annotate = T,
-                                   snp.dat = snp.dat, gbk_path = gbk_path, cds_var = cds_var)
+sr_links_path = "msch/sr_links.tsv", SnpEff_Annotate = T,
+snp.dat = snp.dat, gbk_path = gbk_path, cds_var = cds_var)
 ```
 ![](inst/sup/lr_gwes.png)
 
@@ -230,8 +229,8 @@ LDWeaver::cleanup(dset)
 It is possible to generate a genomewide LD distribution map using the following:
 ``` r
 LDWeaver::genomewide_LDMap(lr_links_path = "msch/Temp/lr_links.tsv", 
-                           sr_links_path = "msch/Temp/sr_links.tsv", 
-                           plot_save_path = "msch/GWLD.png")
+sr_links_path = "msch/Temp/sr_links.tsv", 
+plot_save_path = "msch/GWLD.png")
 ```
 > **Note** The paths have now updated after running LDWeaver::cleanup().
 
@@ -244,19 +243,21 @@ sites and their magnitude can be generated using:
 # Generate the Network Plot for pbp genes
 
 network = LDWeaver::create_network_for_gene("pbp", 
-                         sr_annotated_path = "msch/Annotated_links/sr_links_annotated.tsv", 
-                         lr_annotated_path = "msch/Annotated_links/lr_links_annotated.tsv", 
-                         level = 2)
+sr_annotated_path = "msch/Annotated_links/sr_links_annotated.tsv", 
+lr_annotated_path = "msch/Annotated_links/lr_links_annotated.tsv", 
+level = 2)
 
 LDWeaver::create_network(network, 
-          plot_title = "pbp network", 
-          netplot_path = "msch/pbp_network.png", 
-          plot_w = 2000, plot_h = 2000)
+plot_title = "pbp network", 
+netplot_path = "msch/pbp_network.png", 
+plot_w = 2000, plot_h = 2000)
 ```
 ![](inst/sup/network_plot.png)
 
 ## Additional Information
 
+> **Note** With LDWeaver >1.5, you can analyse mega scale datasets with > 2^(32-1) elements. This requires <a href="https://cran.r-project.org/web/packages/spam/" target="_blank">spam</a> and <a href="https://cran.r-project.org/web/packages/spam64/" target="_blank">spam64</a> packages. Set `mega_dset=T` in `LDWeaver::LDWeaver()` to use this feature. Warning! This is currently considerably slower than the default mode (`mega_dset=F`) and only supports single core operations. There will also be minor discrepancies between the two methods due to floating point errors, however, this should only have a minimal impact on the final link ranking. 
+
 ### Key Outputs
 
 If the above steps worked as expected, the following output will be saved to a 
@@ -265,26 +266,27 @@ folder called `sample`, which should be created in the current working directory
 
 - Figures
 
-  1.  sample/cX_fit.png - shows the distribution and modelling of the
-      background linkage disequilibrium (estimated using weighted Mutual
-      Information) vs. bp-separation within each cluster (X = 1,2 in the
-      example)
-  2.  sample/CDS_clustering.png - shows the genome partitioning, based on 
-      the CDS diversity (compared to the reference sequence)
-  3.  sample/sr_gwes_clust.png - short-range GWES plot for each cluster (2 in
-      this case)
-  4.  sample/sr_gwes_combi.png - combined short-range GWES plot (for links with
-      bp positions spanning two clusters, the max srp_value is used)
-  5.  sample/lr_gwes.png - Long range GWES plot (similar to the output from
-      <a href="https://github.com/santeripuranen/SpydrPick" target="_blank">SpydrPick</a>)
+1.  sample/cX_fit.png - shows the distribution and modelling of the
+background linkage disequilibrium (estimated using weighted Mutual
+Information) vs. bp-separation within each cluster (X = 1,2 in the
+example)
+2.  sample/CDS_clustering.png - shows the genome partitioning, based on 
+the CDS diversity (compared to the reference sequence)
+3.  sample/sr_gwes_clust.png - short-range GWES plot for each cluster (2 in
+this case)
+4.  sample/sr_gwes_combi.png - combined short-range GWES plot (for links with
+bp positions spanning two clusters, the max srp_value is used)
+5.  sample/lr_gwes.png - Long range GWES plot (similar to the output from
+<a href="https://github.com/santeripuranen/SpydrPick" target="_blank">SpydrPick</a>)
 
 - Outputs
 
-  1.  sample/sr_links.tsv - tab separated file containing details on
-      short-range links (i.e. links \<= sr_dist bp apart)
-  2.  sample/lr_links.tsv - tab separated file containing details on long-range
-      links (i.e. links \> sr_dist bp apart)
-      
+1.  sample/sr_links.tsv - tab separated file containing details on
+short-range links (i.e. links \<= sr_dist bp apart)
+2.  sample/lr_links.tsv - tab separated file containing details on long-range
+links (i.e. links \> sr_dist bp apart)
+
+
 ### Extra Outputs
 
 > **Note** The default `sr_dist` value in LDWeaver is 20000bp (user modifiable).
@@ -292,11 +294,11 @@ folder called `sample`, which should be created in the current working directory
 
 - Additional Outputs (*not generated*) - can be used to avoid costly re-computations.
 
-  1.  Additional_Outputs/snp_ACGTN.rds - list comprising sparse SNP data from the alignment
-  2.  Additional_Outputs/parsed_gbk.rds - GenBankRecord of the genbank annotation data
-  3.  Additional_Outputs/hdw.rds - named vector comprising Hamming distance weights for
-      each sequence
-  4.  Additional_Outputs/cds_var.rds - list comprising alignment diversity information
+1.  Additional_Outputs/snp_ACGTN.rds - list comprising sparse SNP data from the alignment
+2.  Additional_Outputs/parsed_gbk.rds - GenBankRecord of the genbank annotation data
+3.  Additional_Outputs/hdw.rds - named vector comprising Hamming distance weights for
+each sequence
+4.  Additional_Outputs/cds_var.rds - list comprising alignment diversity information
 
 > **Note** For very large datsets, the user has the option to set `save_additional_outputs=T`. 
 > When these four files are present in \<dset\>/Additional_Outputs/, the saved information
@@ -311,18 +313,18 @@ refers to **sr** (short range) or **lr** (long range).
 
 - Outputs
 
-  1. Annotated_links/X_links_annotated.tsv - tab separated file similar to
-      sample/X_links.tsv with additional SnpEff annotations and allele
-      distribution information
-  2. Tophits/X_tophits.tsv - tab separated file containing the top 250
-      links (user modifiable with `max_tophipts`) . Several filters are applied 
-      to extract the top links from Annotated_links/X_links_annotated.tsv
-  3. SR_Tanglegram - folder compirising html tanglegrams to easily
-      visualise links and the corresponding genomic regions
-  4. GWESExplorer/X_GWESExplorer - folder containing the outputs necessary to dynamically 
-      explore links using
-      <a href="https://github.com/jurikuronen/GWES-Explorer" target="_blank">GWESExplorer</a> 
-      (X = sr,lr).
+1. Annotated_links/X_links_annotated.tsv - tab separated file similar to
+sample/X_links.tsv with additional SnpEff annotations and allele
+distribution information
+2. Tophits/X_tophits.tsv - tab separated file containing the top 250
+links (user modifiable with `max_tophipts`) . Several filters are applied 
+to extract the top links from Annotated_links/X_links_annotated.tsv
+3. SR_Tanglegram - folder compirising html tanglegrams to easily
+visualise links and the corresponding genomic regions
+4. GWESExplorer/X_GWESExplorer - folder containing the outputs necessary to dynamically 
+explore links using
+<a href="https://github.com/jurikuronen/GWES-Explorer" target="_blank">GWESExplorer</a> 
+(X = sr,lr).
 
 > **Note** The default srp_cutoff is 3 (i.e., p=0.001). Short-range links
 > with p\>0.001 are automatically discarded, this can be modified using
@@ -332,15 +334,14 @@ refers to **sr** (short range) or **lr** (long range).
 - Temporary files created during snpEff annotations. These are all written to \<dset\>/Temp 
 and can be ignored or safely deleted)
 
-  1. Temp/snpEff_data - data folder for snpEff
-  2. Temp/snpEff.config - configuration file for snpEff
-  3. Temp/X_annotations.tsv - tab separated file containing full snpEff
-      annotations on each site associated with a short-range GWES link
-      with srp_max \> srp_cutoff
-  4. Temp/X_annotataed_stats.genes.txt - annotations and statistics in tab
-      separated format
-  5. Temp/X_annotated_stats.html - annotations and statistics in html
-      format
-  6. Temp/X_snps.vcf, Temp/X_snps_ann.vcf - input and output from the snpEff
-      annotation pipeline
-
+1. Temp/snpEff_data - data folder for snpEff
+2. Temp/snpEff.config - configuration file for snpEff
+3. Temp/X_annotations.tsv - tab separated file containing full snpEff
+annotations on each site associated with a short-range GWES link
+with srp_max \> srp_cutoff
+4. Temp/X_annotataed_stats.genes.txt - annotations and statistics in tab
+separated format
+5. Temp/X_annotated_stats.html - annotations and statistics in html
+format
+6. Temp/X_snps.vcf, Temp/X_snps_ann.vcf - input and output from the snpEff
+annotation pipeline
\ No newline at end of file
diff --git a/images/icon_dark.jpg b/images/icon_dark.jpg
new file mode 100644
index 0000000..c4d73f7
Binary files /dev/null and b/images/icon_dark.jpg differ
diff --git a/images/icon_light.jpg b/images/icon_light.jpg
new file mode 100644
index 0000000..2ecb76c
Binary files /dev/null and b/images/icon_light.jpg differ
diff --git a/man/LDWeaver.Rd b/man/LDWeaver.Rd
index f643ed1..690ebe3 100644
--- a/man/LDWeaver.Rd
+++ b/man/LDWeaver.Rd
@@ -87,7 +87,9 @@ Larger values will reduce memory usage, plotting time and ARACNE run time. If al
 
 \item{save_additional_outputs}{specify whether to save outputs such as extracted SNPs and Hamming distance weights. Recommended for very large datasets to save time on re-computation (default = F)}
 
-\item{mega_dset}{specify whether the datasets is megascale. This mode requires spam and spam64 packages. This is upto 5 times slower, set to TRUE only if the normal analysis fails (default = F)}
+
+\item{mega_dset}{specify whether the datasets is megascale. This mode requires spam and spam64 packages. This is  >5 times slower, set to TRUE only if the normal analysis fails (default = F)}
+
 }
 \value{
 All generated outputs will be saved to folder <dset>.
@@ -111,5 +113,11 @@ gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver")
 aln_path <- system.file("extdata", "snp_sample.fa.gz", package = "LDWeaver")
 pos <- as.numeric(readLines(system.file("extdata", "snp_sample.fa.pos", package = "LDWeaver")))
 LDWeaver::LDWeaver(dset = dset,  aln_path = aln_path, aln_has_all_bases = F, pos = pos, gbk_path = gbk_path)
+
+dset <- "full_dset_spam"
+gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver")
+aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver")
+LDWeaver::LDWeaver(dset = dset,  aln_path = aln_path,  gbk_path = gbk_path, validate_ref_ann_lengths = F, mega_dset = T)
+
 }
 }