In [2]:
:dep csv = "1.1.6"

:dep serde = "1.0.145"
use serde::{Serialize, Deserialize, Serializer, ser::SerializeStruct, Deserializer};

In [3]:
:dep rand = "0.8.5"
use rand::thread_rng;
use rand::seq::SliceRandom;

In [4]:
:dep rand_distr = {version = "0.4.3"}

:dep plotters = { version = "0.3.4", default_features = false, features = ["evcxr", "all_series", "all_elements"] }
use plotters::prelude::*;
use plotters::coord::Shift;

In [5]:
:dep itertools = "0.10.5"

use itertools::Itertools;
use std::collections::HashMap;

In [7]:
#[derive(Debug, Clone)]
pub struct Flower {
    sepal_length: f32,
    sepal_width: f32,
    petal_length: f32,
    petal_width: f32,
    species: String,
}

In [8]:
impl Flower {
    pub fn into_feature_vector(&self) -> Vec<f32> {
        vec![self.sepal_length, self.sepal_width, self.petal_length, self.petal_width]
    }
    
    pub fn into_labels(&self) -> f32 {
        match self.species.as_str() {
            "setosa" => 0.,
            "versicolor" => 1.,
            "virginica" => 2.,
            l => panic!("Not able to parse the target. Some other target got passed. {:?}", l),
        }
    }
    
    pub fn into_int_labels(&self) -> u64 {
        match self.species.as_str() {
            "setosa" => 0,
            "versicolor" => 1,
            "virginica" => 2,
            l => panic!("Not able to parse the target. Some other target got passed. {:?}", l),
        }
    }
    
    pub fn species_color(&self) -> RGBColor {
        match self.species.as_str() {
                "setosa" => GREEN,
                "versicolor" => RED,
                "virginica" => BLUE,
                l => panic!("Not able to parse the target. Some other target got passed. {:?}", l),        
        }
    }    
}

In [41]:
let mut rdr = csv::Reader::from_path("../dataset/iris-dataset.csv")?;
let mut data: Vec<Flower> = Vec::new();

for result in rdr.records() {
    let record: csv::StringRecord = result?;    
    let sepal_length: f32 = record[0].parse().unwrap();
    let sepal_width: f32 = record[1].parse().unwrap();
    let petal_length: f32 = record[2].parse().unwrap();
    let petal_width: f32 = record[3].parse().unwrap();        
    let species: String = record[4].parse().unwrap();    
    let flower = Flower {
    sepal_length: sepal_length,
    sepal_width: sepal_width,
    petal_length: petal_length,
    petal_width: petal_width,
    species: species,
    };
    data.push(flower);
}

()

In [10]:
data.shuffle(&mut thread_rng());

# draw_graph

In [14]:
evcxr_figure((640, 480), |root| {
    let root = root.titled("Scatter plot with histogram", ("Arial", 20).into_font())?;
    let areas = root.split_by_breakpoints([560], [80]);
    
    let mut scatter_ctx = ChartBuilder::on(&areas[2])
        .x_label_area_size(40)
        .y_label_area_size(40)
        .build_cartesian_2d(4f64..8f64, 1f64..5f64)?;
    scatter_ctx.configure_mesh()
        .disable_x_mesh()
        .disable_y_mesh()
        .x_desc("sepel length")
        .y_desc("sepal_width")
        .draw()?;
    scatter_ctx.draw_series(data.iter().map(|flower| Circle::new((flower.sepal_length as f64, flower.sepal_width as f64), 3, flower.species_color().filled())))?;
    Ok(())    
}).style("width:60%")

In [17]:
evcxr_figure((640, 480), |root| {
    let areas = root.split_evenly((1, 1));
    let mut charts = vec![];
    
    for (area, name) in areas.iter().zip(["X"].into_iter()) {
        let mut chart = ChartBuilder::on(&area)
            .caption(format!("Histogram for {}", name), ("Arial", 20).into_font())
            .x_label_area_size(40)
            .y_label_area_size(40)
            .build_cartesian_2d(40u32..80u32, 0f64..0.2f64)?;
        
        chart.configure_mesh()
            .disable_x_mesh()
            .disable_y_mesh()
            .y_labels(5)
            .x_label_formatter(&|x| format!("{:.1}", *x as f64 / 10.0))
            .y_label_formatter(&|y| format!("{}%", (*y * 100.0) as u32))
            .draw()?;
        charts.push(chart);
    }
    
    let hist_x = Histogram::vertical(&charts[0])
        .style(RED.filled())
        .margin(0)
        .data(data.iter().map(|flower| ((flower.sepal_length * 10.0) as u32, 0.01)));
    charts[0].draw_series(hist_x);
    Ok(())
    
}).style("width:60%")

In [18]:
evcxr_figure((640, 480), |root| {
    let root = root.titled("Flower's graph", ("Arial", 20).into_font())?;
    let areas = root.split_by_breakpoints([560], [80]);
    
    let mut x_hist_ctx = ChartBuilder::on(&areas[0])
        .y_label_area_size(40)
        .build_cartesian_2d(40u32..80u32, 0f64..0.2f64)?;
    
    let mut y_hist_ctx = ChartBuilder::on(&areas[3])
        .x_label_area_size(40)
        .build_cartesian_2d(0f64..0.2f64, 10..50u32)?;

    let mut scatter_ctx = ChartBuilder::on(&areas[2])
        .x_label_area_size(40)
        .y_label_area_size(40)
        .build_cartesian_2d(4f64..8f64, 1f64..5f64)?;
        
    let x_hist = Histogram::vertical(&x_hist_ctx)
        .style(RED.filled())
        .margin(0)
        .data(data.iter().map(|flower| ((flower.sepal_length * 10.0) as u32, 0.01)));
    
    let y_hist = Histogram::horizontal(&y_hist_ctx)
        .style(GREEN.filled())
        .margin(0)
        .data(data.iter().map(|flower| ((flower.sepal_width * 10.0) as u32, 0.01)));
    
    scatter_ctx.configure_mesh()
        .disable_x_mesh()
        .disable_y_mesh()
        .x_desc("sepal length")
        .y_desc("sepal width")
        .draw()?;
    scatter_ctx.draw_series(data.iter().map(|flower| Circle::new((flower.sepal_length as f64, flower.sepal_width as f64), 3, flower.species_color().filled())))?;
    x_hist_ctx.draw_series(x_hist)?;
    y_hist_ctx.draw_series(y_hist)?;

    Ok(())    
}).style("width:60%")

# group by

In [31]:
// sum

let sum_setosa_sepal_length = data.into_iter().fold(0f32, |mut acc, nxt| {
    if nxt.species == "setosa" {
        acc += nxt.sepal_length;
    }
    acc 
});
println!("{}", sum_setosa_sepal_length)

250.30002


()

In [42]:
// count

let mut map:HashMap<String, i32> = HashMap::new();
for flower in data.clone() {
    *map.entry(flower.species).or_insert(0) += 1;
}
println!("{:?}", map);

{"setosa": 50, "versicolor": 50, "virginica": 50}


In [45]:
// list

let mut map:HashMap<String, Vec<_>> = HashMap::new();
for flower in data.clone() {
    map.entry(flower.species).or_default().push(flower.sepal_length);
}
println!("{:?}", map);

{"setosa": [5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0], "virginica": [6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9], "versicolor": [7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7]}


In [64]:
fn mean(v: &Vec<f32>) -> f32 {
    let mut value = 0f32;
    for d in v {
        value += d;
    }
    (value / (v.len() as f32))
}

In [82]:
//  mean
let mut mean_map:HashMap<String, f32> = HashMap::new();
for (key, value) in map.clone() {
    let mean_value = mean(&value);
    mean_map.insert(key, mean_value);
}

mean_map

{"setosa": 5.0059996, "versicolor": 5.936002, "virginica": 6.5879993}

In [91]:
// max
let mut max_map:HashMap<String, f32> = HashMap::new();
for (key, value) in map.clone() {
    let mut target_vec = value.clone();
    target_vec.sort_by(|a, b| a.partial_cmp(b).unwrap());
    max_map.insert(key, target_vec[target_vec.len() - 1]);
}

max_map

{"virginica": 7.9, "versicolor": 7.0, "setosa": 5.8}

In [92]:
// min
let mut min_map:HashMap<String, f32> = HashMap::new();
for (key, value) in map.clone() {
    let mut target_vec = value.clone();
    target_vec.sort_by(|a, b| a.partial_cmp(b).unwrap());
    min_map.insert(key, target_vec[0]);
}

min_map

{"virginica": 4.9, "versicolor": 4.9, "setosa": 4.3}