Skip to content
This repository has been archived by the owner on Jul 16, 2021. It is now read-only.

Commit

Permalink
ENH: Add datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Dec 19, 2016
1 parent c7dcaaf commit c262ac4
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 0 deletions.
195 changes: 195 additions & 0 deletions src/datasets/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
use std::fmt::Debug;

use rulinalg::matrix::Matrix;
use rulinalg::vector::Vector;

/// Dataset container
#[derive(Clone, Debug)]
pub struct Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {

data: D,
target: T
}

impl<D, T> Dataset<D, T> where D: Clone + Debug, T: Clone + Debug {

/// Returns explanatory variable (features)
pub fn data(&self) -> &D {
&self.data
}

/// Return objective variable (target)
pub fn target(&self) -> &T {
&self.target
}
}


/// Load iris dataset.
///
/// The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant.
///
/// Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml].
/// Irvine, CA: University of California, School of Information and Computer Science.
pub fn load_iris() -> Dataset<Matrix<f64>, Vector<usize>> {
let data: Vec<f64> = vec![5.1, 3.5, 1.4, 0.2,
4.9, 3.0, 1.4, 0.2,
4.7, 3.2, 1.3, 0.2,
4.6, 3.1, 1.5, 0.2,
5.0, 3.6, 1.4, 0.2,
5.4, 3.9, 1.7, 0.4,
4.6, 3.4, 1.4, 0.3,
5.0, 3.4, 1.5, 0.2,
4.4, 2.9, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
5.4, 3.7, 1.5, 0.2,
4.8, 3.4, 1.6, 0.2,
4.8, 3.0, 1.4, 0.1,
4.3, 3.0, 1.1, 0.1,
5.8, 4.0, 1.2, 0.2,
5.7, 4.4, 1.5, 0.4,
5.4, 3.9, 1.3, 0.4,
5.1, 3.5, 1.4, 0.3,
5.7, 3.8, 1.7, 0.3,
5.1, 3.8, 1.5, 0.3,
5.4, 3.4, 1.7, 0.2,
5.1, 3.7, 1.5, 0.4,
4.6, 3.6, 1.0, 0.2,
5.1, 3.3, 1.7, 0.5,
4.8, 3.4, 1.9, 0.2,
5.0, 3.0, 1.6, 0.2,
5.0, 3.4, 1.6, 0.4,
5.2, 3.5, 1.5, 0.2,
5.2, 3.4, 1.4, 0.2,
4.7, 3.2, 1.6, 0.2,
4.8, 3.1, 1.6, 0.2,
5.4, 3.4, 1.5, 0.4,
5.2, 4.1, 1.5, 0.1,
5.5, 4.2, 1.4, 0.2,
4.9, 3.1, 1.5, 0.1,
5.0, 3.2, 1.2, 0.2,
5.5, 3.5, 1.3, 0.2,
4.9, 3.1, 1.5, 0.1,
4.4, 3.0, 1.3, 0.2,
5.1, 3.4, 1.5, 0.2,
5.0, 3.5, 1.3, 0.3,
4.5, 2.3, 1.3, 0.3,
4.4, 3.2, 1.3, 0.2,
5.0, 3.5, 1.6, 0.6,
5.1, 3.8, 1.9, 0.4,
4.8, 3.0, 1.4, 0.3,
5.1, 3.8, 1.6, 0.2,
4.6, 3.2, 1.4, 0.2,
5.3, 3.7, 1.5, 0.2,
5.0, 3.3, 1.4, 0.2,
7.0, 3.2, 4.7, 1.4,
6.4, 3.2, 4.5, 1.5,
6.9, 3.1, 4.9, 1.5,
5.5, 2.3, 4.0, 1.3,
6.5, 2.8, 4.6, 1.5,
5.7, 2.8, 4.5, 1.3,
6.3, 3.3, 4.7, 1.6,
4.9, 2.4, 3.3, 1.0,
6.6, 2.9, 4.6, 1.3,
5.2, 2.7, 3.9, 1.4,
5.0, 2.0, 3.5, 1.0,
5.9, 3.0, 4.2, 1.5,
6.0, 2.2, 4.0, 1.0,
6.1, 2.9, 4.7, 1.4,
5.6, 2.9, 3.6, 1.3,
6.7, 3.1, 4.4, 1.4,
5.6, 3.0, 4.5, 1.5,
5.8, 2.7, 4.1, 1.0,
6.2, 2.2, 4.5, 1.5,
5.6, 2.5, 3.9, 1.1,
5.9, 3.2, 4.8, 1.8,
6.1, 2.8, 4.0, 1.3,
6.3, 2.5, 4.9, 1.5,
6.1, 2.8, 4.7, 1.2,
6.4, 2.9, 4.3, 1.3,
6.6, 3.0, 4.4, 1.4,
6.8, 2.8, 4.8, 1.4,
6.7, 3.0, 5.0, 1.7,
6.0, 2.9, 4.5, 1.5,
5.7, 2.6, 3.5, 1.0,
5.5, 2.4, 3.8, 1.1,
5.5, 2.4, 3.7, 1.0,
5.8, 2.7, 3.9, 1.2,
6.0, 2.7, 5.1, 1.6,
5.4, 3.0, 4.5, 1.5,
6.0, 3.4, 4.5, 1.6,
6.7, 3.1, 4.7, 1.5,
6.3, 2.3, 4.4, 1.3,
5.6, 3.0, 4.1, 1.3,
5.5, 2.5, 4.0, 1.3,
5.5, 2.6, 4.4, 1.2,
6.1, 3.0, 4.6, 1.4,
5.8, 2.6, 4.0, 1.2,
5.0, 2.3, 3.3, 1.0,
5.6, 2.7, 4.2, 1.3,
5.7, 3.0, 4.2, 1.2,
5.7, 2.9, 4.2, 1.3,
6.2, 2.9, 4.3, 1.3,
5.1, 2.5, 3.0, 1.1,
5.7, 2.8, 4.1, 1.3,
6.3, 3.3, 6.0, 2.5,
5.8, 2.7, 5.1, 1.9,
7.1, 3.0, 5.9, 2.1,
6.3, 2.9, 5.6, 1.8,
6.5, 3.0, 5.8, 2.2,
7.6, 3.0, 6.6, 2.1,
4.9, 2.5, 4.5, 1.7,
7.3, 2.9, 6.3, 1.8,
6.7, 2.5, 5.8, 1.8,
7.2, 3.6, 6.1, 2.5,
6.5, 3.2, 5.1, 2.0,
6.4, 2.7, 5.3, 1.9,
6.8, 3.0, 5.5, 2.1,
5.7, 2.5, 5.0, 2.0,
5.8, 2.8, 5.1, 2.4,
6.4, 3.2, 5.3, 2.3,
6.5, 3.0, 5.5, 1.8,
7.7, 3.8, 6.7, 2.2,
7.7, 2.6, 6.9, 2.3,
6.0, 2.2, 5.0, 1.5,
6.9, 3.2, 5.7, 2.3,
5.6, 2.8, 4.9, 2.0,
7.7, 2.8, 6.7, 2.0,
6.3, 2.7, 4.9, 1.8,
6.7, 3.3, 5.7, 2.1,
7.2, 3.2, 6.0, 1.8,
6.2, 2.8, 4.8, 1.8,
6.1, 3.0, 4.9, 1.8,
6.4, 2.8, 5.6, 2.1,
7.2, 3.0, 5.8, 1.6,
7.4, 2.8, 6.1, 1.9,
7.9, 3.8, 6.4, 2.0,
6.4, 2.8, 5.6, 2.2,
6.3, 2.8, 5.1, 1.5,
6.1, 2.6, 5.6, 1.4,
7.7, 3.0, 6.1, 2.3,
6.3, 3.4, 5.6, 2.4,
6.4, 3.1, 5.5, 1.8,
6.0, 3.0, 4.8, 1.8,
6.9, 3.1, 5.4, 2.1,
6.7, 3.1, 5.6, 2.4,
6.9, 3.1, 5.1, 2.3,
5.8, 2.7, 5.1, 1.9,
6.8, 3.2, 5.9, 2.3,
6.7, 3.3, 5.7, 2.5,
6.7, 3.0, 5.2, 2.3,
6.3, 2.5, 5.0, 1.9,
6.5, 3.0, 5.2, 2.0,
6.2, 3.4, 5.4, 2.3,
5.9, 3.0, 5.1, 1.8];

let target: Vec<usize> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2];

Dataset{ data: Matrix::new(150, 4, data),
target: Vector::new(target) }
}
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,6 @@ pub mod analysis {
pub mod cross_validation;
pub mod score;
}

/// Module for datasets.
pub mod datasets;
13 changes: 13 additions & 0 deletions tests/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
extern crate rusty_machine as rm;

use rm::datasets;
use rm::prelude::*;

#[test]
fn test_iris() {
let dt = datasets::load_iris();
assert_eq!(dt.data().rows(), 150);
assert_eq!(dt.data().cols(), 4);

assert_eq!(dt.target().size(), 150);
}

0 comments on commit c262ac4

Please sign in to comment.