Skip to content

Commit

Permalink
Merge c649dee into 22badcb
Browse files Browse the repository at this point in the history
  • Loading branch information
yaansz committed Sep 5, 2020
2 parents 22badcb + c649dee commit f9e7515
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 3 deletions.
33 changes: 32 additions & 1 deletion src/sklearn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,35 @@ function generate_classification(; n_samples::Int = 100,
random_state = random_state)

return convert(features, labels)
end
end


"""
function generate_low_rank_matrix(; n_samples::Int =100,
n_features::Int =100,
effective_rank::Int =10,
tail_strength::Float64 =0.5,
random_state::Union{Int, Nothing} = nothing)
Generate a mostly low rank matrix with bell-shaped singular values
#Arguments
- `n_samples::Int = 100`: The number of samples.
- `n_features::Int = 20`: The total number of features. These comprise `n_informative` informative features, `n_redundant` redundant features, `n_repeated` duplicated features and `n_features-n_informative-n_redundant-n_repeated` useless features drawn at random.
- `effective_rank::Int = 10`: The approximate number of singular vectors required to explain most of the data by linear combinations.
- `tail_strength::Float64 = 0.5`: The relative importance of the fat noisy tail of the singular values profile.
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See Glossary.
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_low_rank_matrix.html)
"""
function generate_low_rank_matrix(; n_samples::Int = 100,
n_features::Int = 100,
effective_rank::Int = 10,
tail_strength::Float64 = 0.5,
random_state::Union{Int, Nothing} = nothing)

features = datasets.make_low_rank_matrix(n_samples = n_samples,
n_features = n_features,
effective_rank = effective_rank,
tail_strength = tail_strength,
random_state = random_state)
#return convert(features, labels)

end
13 changes: 11 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,17 @@ using Test
n_features = features,
n_classes = 1)


@test size(data)[1] == samples
@test size(data)[2] == features + 1

end
data = SyntheticDatasets.generate_low_rank_matrix(n_samples = samples,
n_features = features,
effective_rank = 10,
tail_strength = 0.5,
random_state = 5)

@test size(data)[1] == samples
@test size(data)[2] == features

end

0 comments on commit f9e7515

Please sign in to comment.