From 173b4d164e045077ce4e9a631d5f29cf7ca88524 Mon Sep 17 00:00:00 2001 From: soniamitchell Date: Fri, 13 Aug 2021 12:44:08 +0100 Subject: [PATCH 1/6] initial commit --- src/Genetics.jl | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/Genetics.jl diff --git a/src/Genetics.jl b/src/Genetics.jl new file mode 100644 index 0000000..217ef96 --- /dev/null +++ b/src/Genetics.jl @@ -0,0 +1,44 @@ +using Diversity +using Diversity.API + +import PopGen +import StringDistances +import LinearAlgebra # Symmetric() + +function _hammingDistance(geno1, geno2) + ismissing(geno1) || ismissing(geno2) && return missing + if length(geno1) > 2 + @warn "hamming_distance may not work correctly for ploidy > 2" + end + #TODO Fix ploidy > 2 - e.g. (1, 1, 1, 2) ≠ (1, 2, 2, 2) + + max(sum(geno1 .∉ Ref(geno2)), sum(geno2 .∉ Ref(geno1))) +end + +function genDistance(dat::PopData) + # Initialise objects + matrix_obj = PopGen.loci_matrix(dat) + N = size(matrix_obj, 1) + output = zeros(Float64, N, N) + indices = PopGen.pairwise_pairs(1:N) + + # Calculate distance matrix + for (a, b) in indices + output[a, b] = sum(_hammingDistance.((@view matrix_obj[a, :]), + (@view matrix_obj[b, :]))) + end + return LinearAlgebra.Symmetric(output) +end + +function genDistance(dat::AbstractVector) + # Initialise objects + N = length(dat) + output = zeros(Int64, N, N) + indices = PopGen.pairwise_pairs(1:N) + + # Calculate distance matrix + for (a, b) in indices + output[a, b] = StringDistances.evaluate(StringDistances.Hamming(), dat[a][2], dat[b][2]) + end + return LinearAlgebra.Symmetric(output) +end \ No newline at end of file From 3020fe7dfcf90e740bb8a9a86928dc7467e2b768 Mon Sep 17 00:00:00 2001 From: soniamitchell Date: Fri, 13 Aug 2021 12:44:29 +0100 Subject: [PATCH 2/6] export genDistance --- src/Diversity.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Diversity.jl b/src/Diversity.jl index 90202a7..0a7036c 100644 --- a/src/Diversity.jl +++ b/src/Diversity.jl @@ -93,6 +93,9 @@ export RawAlpha, NormalisedAlpha export RawBeta, NormalisedBeta, RawRho, NormalisedRho export Gamma +include("Genetics.jl") +export genDistance + """ Diversity.ShortNames submodule From 726a281e5c872134fa85e66c43342d497cf6ae63 Mon Sep 17 00:00:00 2001 From: soniamitchell Date: Fri, 13 Aug 2021 12:44:36 +0100 Subject: [PATCH 3/6] typo --- src/Diversity.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Diversity.jl b/src/Diversity.jl index 0a7036c..4009bae 100644 --- a/src/Diversity.jl +++ b/src/Diversity.jl @@ -18,7 +18,7 @@ not automatically exported (as we feel they are too short) and with matching longer ASCII names (e.g. ```NormalisedAlpha()```), which are. We also provide functions to calculate appropriate ```subcommunityDiversity()``` and ```metacommunityDiversity()``` -values for each measure, a general ```diversity()``` function for +values for each measure, and a general ```diversity()``` function to extract any diversity measure at a series of scales. """ module Diversity From f62211549c833ae6e65a2c6b7b82bde67eb69fb7 Mon Sep 17 00:00:00 2001 From: soniamitchell Date: Fri, 13 Aug 2021 12:45:01 +0100 Subject: [PATCH 4/6] rename --- src/Diversity.jl | 2 +- src/Genetics.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Diversity.jl b/src/Diversity.jl index 4009bae..ae9c1d6 100644 --- a/src/Diversity.jl +++ b/src/Diversity.jl @@ -94,7 +94,7 @@ export RawBeta, NormalisedBeta, RawRho, NormalisedRho export Gamma include("Genetics.jl") -export genDistance +export geneDistance """ Diversity.ShortNames submodule diff --git a/src/Genetics.jl b/src/Genetics.jl index 217ef96..32fcb72 100644 --- a/src/Genetics.jl +++ b/src/Genetics.jl @@ -15,7 +15,7 @@ function _hammingDistance(geno1, geno2) max(sum(geno1 .∉ Ref(geno2)), sum(geno2 .∉ Ref(geno1))) end -function genDistance(dat::PopData) +function geneDistance(dat::PopData) # Initialise objects matrix_obj = PopGen.loci_matrix(dat) N = size(matrix_obj, 1) From 7efb6b0a2ac610792550f82290999fb042cc340a Mon Sep 17 00:00:00 2001 From: soniamitchell Date: Mon, 16 Aug 2021 16:13:56 +0100 Subject: [PATCH 5/6] typo --- src/Genetics.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Genetics.jl b/src/Genetics.jl index 32fcb72..f78adde 100644 --- a/src/Genetics.jl +++ b/src/Genetics.jl @@ -30,7 +30,7 @@ function geneDistance(dat::PopData) return LinearAlgebra.Symmetric(output) end -function genDistance(dat::AbstractVector) +function geneDistance(dat::AbstractVector) # Initialise objects N = length(dat) output = zeros(Int64, N, N) From da8b15a53c3d0256d8baff16c61c2b289d4f589b Mon Sep 17 00:00:00 2001 From: soniamitchell Date: Mon, 16 Aug 2021 17:10:53 +0100 Subject: [PATCH 6/6] create genetic type --- src/Genetics.jl | 110 +++++++++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/src/Genetics.jl b/src/Genetics.jl index f78adde..908c176 100644 --- a/src/Genetics.jl +++ b/src/Genetics.jl @@ -1,44 +1,68 @@ -using Diversity -using Diversity.API - -import PopGen -import StringDistances -import LinearAlgebra # Symmetric() - -function _hammingDistance(geno1, geno2) - ismissing(geno1) || ismissing(geno2) && return missing - if length(geno1) > 2 - @warn "hamming_distance may not work correctly for ploidy > 2" - end - #TODO Fix ploidy > 2 - e.g. (1, 1, 1, 2) ≠ (1, 2, 2, 2) +# using Diversity +# using Diversity.API + +# using PopGen +# using StringDistances +# using LinearAlgebra + +# using FASTX + +# abstract type AbstractGeneticTypes{PopData} <: +# Diversity.API.AbstractTypes +# end + +# struct GeneticType{PopData} <: AbstractGeneticTypes{PopData} +# dat::PopData +# ntypes::Int64 +# Zmatrix::Matrix{Float64} +# end + +# function _hammingDistance(geno1, geno2) +# ismissing(geno1) || ismissing(geno2) && return missing +# if length(geno1) > 2 +# @warn "hamming_distance may not work correctly for ploidy > 2" +# end +# #TODO Fix ploidy > 2 - e.g. (1, 1, 1, 2) ≠ (1, 2, 2, 2) - max(sum(geno1 .∉ Ref(geno2)), sum(geno2 .∉ Ref(geno1))) -end - -function geneDistance(dat::PopData) - # Initialise objects - matrix_obj = PopGen.loci_matrix(dat) - N = size(matrix_obj, 1) - output = zeros(Float64, N, N) - indices = PopGen.pairwise_pairs(1:N) - - # Calculate distance matrix - for (a, b) in indices - output[a, b] = sum(_hammingDistance.((@view matrix_obj[a, :]), - (@view matrix_obj[b, :]))) - end - return LinearAlgebra.Symmetric(output) -end - -function geneDistance(dat::AbstractVector) - # Initialise objects - N = length(dat) - output = zeros(Int64, N, N) - indices = PopGen.pairwise_pairs(1:N) - - # Calculate distance matrix - for (a, b) in indices - output[a, b] = StringDistances.evaluate(StringDistances.Hamming(), dat[a][2], dat[b][2]) - end - return LinearAlgebra.Symmetric(output) -end \ No newline at end of file +# max(sum(geno1 .∉ Ref(geno2)), sum(geno2 .∉ Ref(geno1))) +# end + +# function GeneticType(dat::PopData) +# # Initialise objects +# matrix_obj = PopGen.loci_matrix(dat) +# ntypes = size(matrix_obj, 1) +# output = zeros(Float64, ntypes, ntypes) +# indices = PopGen.pairwise_pairs(1:ntypes) + +# # Calculate distance matrix +# for (a, b) in indices +# output[a, b] = sum(_hammingDistance.((@view matrix_obj[a, :]), +# (@view matrix_obj[b, :]))) +# end +# dist = Symmetric(output) +# dist /= maximum(dist) + +# # Calculate similarity matrix +# Zmatrix = 1 .- dist + +# return GeneticType{PopData}(dat, ntypes, Zmatrix) +# end + +# function GeneticType(dat::Vector) # Vector{BioSequences.AminoAcidSequence} +# # Initialise objects +# ntypes = length(dat) +# output = zeros(Int64, ntypes, ntypes) +# indices = PopGen.pairwise_pairs(1:ntypes) + +# # Calculate distance matrix +# for (a, b) in indices +# output[a, b] = evaluate(Hamming(), dat[a], dat[b]) +# end +# dist = Symmetric(output) +# dist /= maximum(dist) + +# # Calculate similarity matrix +# Zmatrix = 1 .- dist + +# return GeneticType{BioSequences.AminoAcidSequence}(dat, ntypes, Zmatrix) +# end \ No newline at end of file