## Add project dependencies

In [33]:
:dep csv = { version = "1.1" }
:dep ndarray = { version = "0.15.6" }
:dep linfa = { version = "0.7.0" }
:dep linfa-trees = { version = "0.7" }
:dep ndarray-rand = { version = "0.15.0" }
:dep plotters = { version = "0.3.7" }
:dep bincode = {version = "1.3.3"}
:dep serde = { version = "1.0", features = ["derive"] }


use csv::ReaderBuilder;
use ndarray::{Array1, Array2, Axis, stack, ArrayBase, Data, Ix2};
use linfa::prelude::*;
use linfa_trees::DecisionTree;
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{Write, Read};
use serde::{Serialize, Deserialize};
use bincode;

## Encode Labels

In [34]:
fn read_csv_to_ndarray(file_path: &str) -> Result<(Array2<f64>, Vec<String>), Box<dyn Error>> {
    let mut reader = ReaderBuilder::new()
        .has_headers(true)
        .from_path(file_path)?;
    
    let mut features = Vec::new();
    let mut labels = Vec::new();
    let mut row_count = 0;
    let mut col_count = 0;
    
    for result in reader.records() {
        let record = result?;
        row_count += 1;
        
        let row: Vec<f64> = record
            .iter()
            .take(record.len() - 1)
            .filter_map(|field| field.parse::<f64>().ok())
            .collect();
        
        if row_count == 1 {
            col_count = row.len();
        } else if row.len() != col_count {
            return Err(format!("Inconsistent column count at row {}", row_count).into());
        }
        
        features.extend(row);
        labels.push(record[record.len() - 1].to_string());
    }
    
    if features.is_empty() || labels.is_empty() {
        return Err("CSV file is empty or data is invalid".into());
    }
    
    let feature_array = Array2::from_shape_vec((row_count, col_count), features)?;
    Ok((feature_array, labels))
}

fn encode_labels(labels: &[String]) -> (Vec<usize>, HashMap<String, usize>) {
    let mut label_map = HashMap::new();
    let mut encoded_labels = Vec::new();
    let mut next_label = 0;

    for label in labels {
        let encoded_label = *label_map.entry(label.clone()).or_insert_with(|| {
            let current = next_label;
            next_label += 1;
            current
        });
        encoded_labels.push(encoded_label);
    }

    (encoded_labels, label_map)
}

fn split_data(
    features: &ArrayBase<impl Data<Elem = f64>, Ix2>,
    labels: &[usize],
) -> (Array2<f64>, Array1<usize>, Array2<f64>, Array1<usize>) {
    let total_rows = features.nrows();
    let train_size = (total_rows as f64 * 0.8) as usize;

    let x_train = features.slice(s![..train_size, ..]).to_owned();
    let y_train = Array1::from_iter(labels[..train_size].iter().cloned());

    let x_test = features.slice(s![train_size.., ..]).to_owned();
    let y_test = Array1::from_iter(labels[train_size..].iter().cloned());

    (x_train, y_train, x_test, y_test)
}

In [35]:
#[derive(Serialize, Deserialize)]
struct ExportableModel {
    label_map: HashMap<String, usize>,
    max_depth: Option<usize>,
    min_weight_leaf: f64,
    // Add other necessary model parameters
}

fn export_model(
    _model: &DecisionTree<f64, usize>, 
    label_map: &HashMap<String, usize>, 
    filepath: &str
) -> Result<(), Box<dyn Error>> {
    let exportable_model = ExportableModel {
        label_map: label_map.clone(),
        max_depth: Some(10), // Extract from model configuration
        min_weight_leaf: 1.0, // Extract from model configuration
    };
    
    let encoded = bincode::serialize(&exportable_model)?;
    let mut file = File::create(filepath)?;
    file.write_all(&encoded)?;
    
    println!("Model metadata exported to {}", filepath);
    Ok(())
}

fn import_model(filepath: &str) -> Result<(HashMap<String, usize>), Box<dyn Error>> {
    let mut file = File::open(filepath)?;
    let mut buffer = Vec::new();
    file.read_to_end(&mut buffer)?;
    
    let exportable_model: ExportableModel = bincode::deserialize(&buffer)?;
    
    Ok((exportable_model.label_map))
}

fn read_csv_to_ndarray(file_path: &str) -> Result<(Array2<f64>, Vec<String>), Box<dyn Error>> {
    let mut reader = ReaderBuilder::new()
        .has_headers(true)
        .from_path(file_path)?;
    
    let mut features = Vec::new();
    let mut labels = Vec::new();
    let mut row_count = 0;
    let mut col_count = 0;
    
    for result in reader.records() {
        let record = result?;
        row_count += 1;
        
        let row: Vec<f64> = record
            .iter()
            .take(record.len() - 1)
            .filter_map(|field| field.parse::<f64>().ok())
            .collect();
        
        if row_count == 1 {
            col_count = row.len();
        } else if row.len() != col_count {
            return Err(format!("Inconsistent column count at row {}", row_count).into());
        }
        
        features.extend(row);
        labels.push(record[record.len() - 1].to_string());
    }
    
    if features.is_empty() || labels.is_empty() {
        return Err("CSV file is empty or data is invalid".into());
    }
    
    let feature_array = Array2::from_shape_vec((row_count, col_count), features)?;
    Ok((feature_array, labels))
}

fn encode_labels(labels: &[String]) -> (Vec<usize>, HashMap<String, usize>) {
    let mut label_map = HashMap::new();
    let mut encoded_labels = Vec::new();
    let mut next_label = 0;

    for label in labels {
        let encoded_label = *label_map.entry(label.clone()).or_insert_with(|| {
            let current = next_label;
            next_label += 1;
            current
        });
        encoded_labels.push(encoded_label);
    }

    (encoded_labels, label_map)
}

fn split_data(
    features: &ArrayBase<impl Data<Elem = f64>, Ix2>, 
    labels: Vec<usize>
) -> (Array2<f64>, Array1<usize>, Array2<f64>, Array1<usize>) {
    let total_rows = features.nrows();
    let train_size = (total_rows as f64 * 0.8) as usize;

    let x_train = features.slice(s![..train_size, ..]).to_owned();
    let y_train = Array1::from_vec(labels[..train_size].to_vec());
    
    let x_test = features.slice(s![train_size.., ..]).to_owned();
    let y_test = Array1::from_vec(labels[train_size..].to_vec());

    (x_train, y_train, x_test, y_test)
}

fn run_main() -> Result<DecisionTree<f64, usize>, Box<dyn Error>> {
    let file_paths = vec![
        "./data/1.csv",
        "./data/2.csv",
        "./data/3.csv",
        "./data/4.csv",
    ];
    
    println!("Reading data from CSV...");
    let mut all_data = Vec::new();
    let mut all_labels = Vec::new();

    for file_path in file_paths {
        let (data, labels) = read_csv_to_ndarray(file_path)?;
        all_data.push(data);
        all_labels.extend(labels);
    }
    
    let views: Vec<_> = all_data.iter().map(|arr| arr.view()).collect();
    let data = stack(Axis(0), views.as_slice())?;
    
    println!("Encoding labels...");
    let (encoded_labels, label_map) = encode_labels(&all_labels);
    
    println!("Splitting data...");
    let (x_train, y_train, x_test, y_test) = split_data(&data, encoded_labels);
    
    println!("Training model...");
    let model = DecisionTree::params()
        .max_depth(Some(10))
        .min_weight_leaf(1.0)
        .fit(&Dataset::new(x_train, y_train))
        .expect("Failed to train model");
    
    println!("Making predictions...");
    let predictions = model.predict(&x_test);
    
    let accuracy = predictions
        .iter()
        .zip(y_test.iter())
        .filter(|(&pred, &actual)| pred == actual)
        .count() as f64
        / y_test.len() as f64;
    
    println!("Model accuracy: {:.2}%", accuracy * 100.0);
    
    export_model(&model, &label_map, "decision_tree_model.bin")?;
    
    Ok(model)
}

fn main() -> Result<(), Box<dyn Error>> {
    run_main()?;
    
    let imported_label_map = import_model("decision_tree_model.bin")?;
    println!("Imported label map: {:?}", imported_label_map);
    
    Ok(())
}

Error: unnecessary parentheses around type

Error: unnecessary parentheses around function argument

Error: mismatched types

## Export model