Skip to content

Commit

Permalink
Merge ac7fd92 into ecf7441
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergiorezende22 committed Sep 9, 2020
2 parents ecf7441 + ac7fd92 commit 0c84aef
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/SyntheticDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ function __init__()
end

include("sklearn.jl")
include("matlab.jl")

function convert(features::Array{T, 2}, labels::Array{D, 1})::DataFrame where {T <: Number, D <: Number}
df = DataFrame()
Expand Down
30 changes: 30 additions & 0 deletions src/matlab.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
generate_twospirals(; n_samples::Int = 2000,
start_degrees::Int = 90,
total_degrees::Int = 570,
noise::Float64 = 0.2
Generate two spirals dataset. Return a Nx3 matrix, where each line contains the X,Y coordinates and the class of an instance.
# Arguments
- `n_samples::Int = 2000`: The total number of points generated.
- `start_degrees::Int = 90`: Determines how far from the origin the spirals start.
- `total_degrees::Int = 570`: Controls the lenght of the spirals.
- `noise::Float64 = 0.2`: Determines the noise in the dataset.
Reference: [link](https://la.mathworks.com/matlabcentral/fileexchange/41459-6-functions-for-generating-artificial-datasets)
"""
function generate_twospirals(; n_samples::Int = 2000,
start_degrees::Int = 90,
total_degrees::Int = 570,
noise::Float64 = 0.2)
start_degrees = deg2rad(start_degrees);

N1 = floor(Int, n_samples / 2);
N2 = n_samples - N1;

n = start_degrees .+ sqrt.(rand(N1,1)) .* deg2rad(total_degrees);
d1 = [-cos.(n).*n + rand(N1,1).*noise sin.(n).*n+rand(N1,1).*noise zeros(N1,1)];

n = start_degrees .+ sqrt.(rand(N2,1)) .* deg2rad(total_degrees);
d2 = [cos.(n).*n+rand(N2,1)*noise -sin.(n).*n+rand(N2,1)*noise ones(N2,1)];

return [d1; d2];
end
12 changes: 12 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ using Test
@test size(data)[1] == samples
@test size(data)[2] == features + 1

@test size(data)[1] == samples
@test size(data)[2] == features + 1

data = SyntheticDatasets.generate_friedman1(n_samples = samples,
n_features = features)

Expand Down Expand Up @@ -79,3 +82,12 @@ using Test
@test size(data)[1] == samples
@test size(data)[2] == 4
end

@testset "Matlab Generators" begin
samples = 20000

data = SyntheticDatasets.generate_twospirals(n_samples = samples,
noise = 2.2)

@test size(data)[1] == samples
end

0 comments on commit 0c84aef

Please sign in to comment.