Skip to content

Commit

Permalink
Merge pull request #122 from alyst/more-0.7
Browse files Browse the repository at this point in the history
More Julia v0.7/1.0 fixes
  • Loading branch information
alyst committed Aug 15, 2018
2 parents 7c5034b + f9cb54e commit 4b4105b
Show file tree
Hide file tree
Showing 19 changed files with 361 additions and 324 deletions.
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ os:
- osx
julia:
- 0.7
- 1.0
- nightly
matrix:
allow_failures:
Expand All @@ -14,6 +15,6 @@ notifications:
# uncomment the following lines to override the default test script
#script:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("Clustering"); Pkg.test("Clustering"; coverage=true)'
# - julia -e 'using Pkg; Pkg.add(pwd()); Pkg.build("Clustering"); Pkg.test("Clustering"; coverage=true)'
after_success:
- julia -e 'cd(Pkg.dir("Clustering")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
- julia -e 'using Pkg, Clustering; cd(joinpath(dirname(pathof(Clustering)), "..")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

This package provides a set of algorithms for data clustering.

[![0.5](http://pkg.julialang.org/badges/Clustering_0.5.svg)](http://pkg.julialang.org/?pkg=Clustering)
[![0.6](http://pkg.julialang.org/badges/Clustering_0.6.svg)](http://pkg.julialang.org/?pkg=Clustering)
[![0.6](http://pkg.julialang.org/badges/Clustering_0.6.svg)](http://pkg.julialang.org/?pkg=Clustering&ver=0.6)
[![0.7](http://pkg.julialang.org/badges/Clustering_0.7.svg)](http://pkg.julialang.org/?pkg=Clustering&ver=0.7)
[![1.0](http://pkg.julialang.org/badges/Clustering_1.0.svg)](http://pkg.julialang.org/?pkg=Clustering&ver=1.0)

[![Travis](https://travis-ci.org/JuliaStats/Clustering.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/Clustering.jl)
[![Coveralls](https://coveralls.io/repos/github/JuliaStats/Clustering.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaStats/Clustering.jl?branch=master)

Expand Down Expand Up @@ -37,5 +39,3 @@ Pkg.add("Clustering")
## Resources

**Documentation:** http://clusteringjl.readthedocs.org/en/latest/


2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
julia 0.7.0-alpha
julia 0.7
Distances 0.3.1
NearestNeighbors 0.0.3
StatsBase 0.9.0
6 changes: 1 addition & 5 deletions src/Clustering.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
__precompile__()

module Clustering
using Distances
using NearestNeighbors
Expand All @@ -8,9 +6,7 @@ module Clustering
using Printf
using LinearAlgebra
using SparseArrays
if VERSION >= v"0.7.0-beta.85"
using Statistics
end
using Statistics

import Base: show
import StatsBase: IntegerVector, RealVector, RealMatrix, counts
Expand Down
9 changes: 4 additions & 5 deletions src/hclust.jl
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,13 @@ function cutree(hclust::Hclust; k::Int=1,
push!(clusters, new)
i += 1
end
all = vcat(clusters, nodes)
all = all[map(length, all) .> 0]
all = filter!(!isempty, vcat(clusters, nodes))
## convert to a single array of cluster indices
res = Vector{Int}(undef, nnodes)
res = fill(0, nnodes)
for (i, cl) in enumerate(all)
res[cl] = i
res[cl] .= i
end
res
return res
end

## some diagnostic functions, not exported
Expand Down
31 changes: 10 additions & 21 deletions src/kmeans.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ function _kmeans!(
dmat = pairwise(distance, centers, x)
dmat = convert(Array{T}, dmat) #Can be removed if one day Distance.result_type(SqEuclidean(), T, T) == T
update_assignments!(dmat, true, assignments, costs, counts, to_update, unused)
objv = w == nothing ? sum(costs) : dot(w, costs)
objv = w === nothing ? sum(costs) : dot(w, costs)

# main loop
t = 0
Expand All @@ -111,7 +111,7 @@ function _kmeans!(
# update pairwise distance matrix

if !isempty(unused)
to_update[unused] = true
to_update[unused] .= true
end

if t == 1 || num_affected > 0.75 * k
Expand All @@ -120,7 +120,7 @@ function _kmeans!(
# if only a small subset is affected, only compute for that subset
affected_inds = findall(to_update)
dmat_p = pairwise(distance, centers[:, affected_inds], x)
dmat[affected_inds, :] = dmat_p
dmat[affected_inds, :] .= dmat_p
end

# update assignments
Expand All @@ -131,7 +131,7 @@ function _kmeans!(
# compute change of objective and determine convergence

prev_objv = objv
objv = w == nothing ? sum(costs) : dot(w, costs)
objv = w === nothing ? sum(costs) : dot(w, costs)
objv_change = objv - prev_objv

if objv_change > tol
Expand Down Expand Up @@ -297,18 +297,15 @@ function update_centers!(
assignments::Vector{Int}, # in: assignments (n)
to_update::Vector{Bool}, # in: whether a center needs update (k)
centers::Matrix{T}, # out: updated centers (d x k)
cweights::Vector) where T<:AbstractFloat # out: updated cluster weights (k)
cweights::Vector # out: updated cluster weights (k)
) where T<:AbstractFloat

d::Int = size(x, 1)
n::Int = size(x, 2)
k::Int = size(centers, 2)

# initialize center weights
for i = 1 : k
if to_update[i]
cweights[i] = 0.
end
end
cweights[to_update] .= 0.0

# accumulate columns
# accumulate_cols_u!(centers, cweights, x, assignments, weights, to_update)
Expand All @@ -323,13 +320,9 @@ function update_centers!(
rj = view(centers, :, cj)
xj = view(x, :, j)
if cweights[cj] > 0
for i = 1:d
@inbounds rj[i] += xj[i] * wj
end
@inbounds rj .+= xj * wj
else
for i = 1:d
@inbounds rj[i] = xj[i] * wj
end
@inbounds rj .= xj * wj
end
cweights[cj] += wj
end
Expand All @@ -339,11 +332,7 @@ function update_centers!(
# sum ==> mean
for j = 1:k
if to_update[j]
@inbounds cj::T = 1 / cweights[j]
vj = view(centers,:,j)
for i = 1:d
@inbounds vj[i] *= cj
end
@inbounds centers[:, j] .*= 1 / cweights[j]
end
end
end
Expand Down
18 changes: 7 additions & 11 deletions src/mcl.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# MCL (Markov CLustering algorithm)

"""
immutable MCLResult <: ClusteringResult
struct MCLResult <: ClusteringResult
Result returned by `mcl()`.
"""
Expand All @@ -21,16 +21,12 @@ end
# `zero_tol` is a minimal value to consider as an element-to-cluster assignment
function _mcl_clusters(mcl_adj::AbstractMatrix, allow_singles::Bool, zero_tol::Float64 = 1E-20)
# remove rows containing only zero elements and convert into a mask of nonzero elements
el2clu_mask = mcl_adj[squeeze(sum(mcl_adj, dims=2), dims=2) .> zero_tol, :] .> zero_tol
el2clu_mask = mcl_adj[dropdims(sum(mcl_adj, dims=2), dims=2) .> zero_tol, :] .> zero_tol

# assign cluster indexes to each node
# cluster index is the index of the first TRUE in a given column
@static if VERSION >= v"0.7.0-beta.73"
_ms = mapslices(el_mask->isempty(el_mask) ? 0 : argmax(el_mask), el2clu_mask, dims=1)
else
_ms = mapslices(el_mask->isempty(el_mask) ? 0 : argmax(el_mask), el2clu_mask, 1)
end
clu_ixs = squeeze(_ms, dims=1)
_ms = mapslices(el_mask->isempty(el_mask) ? 0 : argmax(el_mask), el2clu_mask, dims=1)
clu_ixs = dropdims(_ms, dims=1)
clu_sizes = zeros(Int, size(el2clu_mask, 1))
unassigned_count = 0
@inbounds for clu_ix in clu_ixs
Expand Down Expand Up @@ -155,7 +151,7 @@ function mcl(adj::AbstractMatrix{T};
# initialize the MCL adjacency matrix by normalized `adj` weights
mcl_adj = copy(adj)
# normalize in columns
rmul!(mcl_adj, Diagonal(map(x -> x != 0.0 ? 1.0/x : x, squeeze(sum(mcl_adj, dims=1), dims=1))))
rmul!(mcl_adj, Diagonal(map(x -> x != 0.0 ? 1.0/x : x, dropdims(sum(mcl_adj, dims=1), dims=1))))
mcl_norm = norm(mcl_adj)
if !isfinite(mcl_norm)
throw(OverflowError("The norm of the input adjacency matrix is not finite"))
Expand All @@ -176,15 +172,15 @@ function mcl(adj::AbstractMatrix{T};

# normalize in columns
rmul!(next_mcl_adj, Diagonal(map(x -> x != 0.0 ? 1.0/x : x,
squeeze(sum(next_mcl_adj, dims=1), dims=1))))
dropdims(sum(next_mcl_adj, dims=1), dims=1))))

next_mcl_norm = norm(next_mcl_adj)
if !isfinite(next_mcl_norm)
@warn("MCL adjacency matrix norm is not finite")
break
end
rel_delta = euclidean(next_mcl_adj, mcl_adj)/mcl_norm
(display == :verbose) && info("MCL iter. #$niter: rel.Δ=", rel_delta)
(display == :verbose) && @info("MCL iter. #$niter: rel.Δ=", rel_delta)
(converged = rel_delta <= tol) && break
# update (swap) MCL adjacency
niter += 1
Expand Down
5 changes: 4 additions & 1 deletion test/affprop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ using Clustering
using LinearAlgebra
using Random

srand(34568)
@testset "affinityprop() (affinity propagation)" begin

Random.seed!(34568)

d = 10
n = 500
Expand All @@ -31,3 +33,4 @@ for i = 1:k
@test R.counts[i] == count(==(i), R.assignments)
end

end

0 comments on commit 4b4105b

Please sign in to comment.