In [2]:
:dep csv = "1.3.1"
:dep linfa = "0.7.1"
:dep plotters = "0.3.7"
:dep ndarray = "0.15.6"
:dep linfa-logistic = "0.7.1"
:dep linfa-datasets = { version = "0.7.1", features = ["iris"] }
:dep rand = "0.8.5" 
use linfa::prelude::*;
use ndarray::prelude::*;
use ndarray::Array1;
use linfa_logistic::{MultiLogisticRegression, MultiFittedLogisticRegression};
use linfa::metrics::ConfusionMatrix;
use rand::prelude::*;



/// Loads the Iris dataset from a CSV file and returns a linfa Dataset
pub fn load_iris_dataset(split_ratio: f32) -> (Dataset<f64, usize, Ix1>, Dataset<f64, usize, Ix1>){

    // let mut reader = ReaderBuilder::new()
    //     .has_headers(false)
    //     .from_path(path)?;
    let mut rng = thread_rng();
    
    let (train, test): (Dataset<f64, usize, Ix1>, Dataset<f64, usize, Ix1>) = linfa_datasets::iris().shuffle(&mut rng)
        .with_feature_names(vec!["sepal length", "sepal width", "petal length", "petal width"])
        .split_with_ratio(split_ratio);
    println!(
        "Fit Multinomial Logistic Regression classifier with #{} training points",
        train.nsamples()
    );
    println!("Feature names {:?}", train.feature_names());
    println!("Dataset records shape: {:?}", train.records.shape());
    println!("Dataset targets shape: {:?}", train.targets.shape());
    (train,test)
}


fn draw_corr_matrix(corr_matrix) -> Result<(), Box<dyn std::error::Error>>{
    correlation_coefficients = corr_matrix
}




let (train_set, test_set) = load_iris_dataset(0.9);
let corr_matrix = train_set.pearson_correlation();
println!("Pearson correlation matrix of training features");
println!("{}", corr_matrix);

    


Fit Multinomial Logistic Regression classifier with #135 training points
Feature names ["sepal length", "sepal width", "petal length", "petal width"]
Dataset records shape: [135, 4]
Dataset targets shape: [135]
Pearson correlation matrix of training features
sepal length-0.12 0.87 0.81 
sepal width              -0.43 -0.37 
petal length                          0.96 
petal width



In [3]:
fn fit_logistic_regressor(train_set: &Dataset<f64, usize, Ix1>) -> MultiFittedLogisticRegression<f64, usize> {
    

    println!(
        "Fit Multinomial Logistic Regression classifier with #{} training points",
        train_set.nsamples()
    );

    // fit a Logistic regression model with 150 max iterations
    let model = MultiLogisticRegression::default()
        .max_iterations(50)
        .fit(train_set)
        .unwrap();

    model

}

fn predict_class(test_set: &Dataset<f64, usize, Ix1>, model: MultiFittedLogisticRegression<f64, usize>) 
    -> (Array1<usize>, ConfusionMatrix<usize>) {
    println!(
        "Predict class of #{} testing points",
        test_set.nsamples()
    );

    let pred = model.predict(test_set);
    let cm = pred.confusion_matrix(test_set).unwrap();
    (pred, cm)
}

let model = fit_logistic_regressor(&train_set);

let (prediction, cm) = predict_class(&test_set, model);

let n_samples_test = test_set.nsamples();
println!("Predictions: {:?}", prediction.slice(s![0..n_samples_test]));
println!("Ground truth: {:?}", test_set.targets.slice(s![0..n_samples_test])); 

cm

Fit Multinomial Logistic Regression classifier with #135 training points
Predict class of #15 testing points
Predictions: [1, 2, 2, 1, 0, 2, 1, 1, 0, 2, 2, 0, 2, 2, 2], shape=[15], strides=[1], layout=CFcf (0xf), const ndim=1
Ground truth: [1, 2, 2, 1, 0, 2, 1, 1, 0, 2, 2, 0, 2, 2, 2], shape=[15], strides=[1], layout=CFcf (0xf), const ndim=1


classes    | 2          | 1          | 0         
2          | 8          | 0          | 0         
1          | 0          | 4          | 0         
0          | 0          | 0          | 3         


In [None]:
// Plot iris dataset features
