# Chapter 6: Algorithm Test Harnesses

### Nombre: Bryan Morales

In [None]:
use strict;
use warnings;
use Data::Dump qw(dump);
use List::Util qw(zip min max sum shuffle); 
use sml;
use AI::MXNet qw(mx);

In [None]:
sub evaluate_algorithm_train_test_split {
    my ($self, $dataset, $algorithm, %args) = ((splice @_, 0, 3), split =>0.6, metric => undef, @_);
    my ($train, $test) = train_test_split($dataset, split => $args{split});
    my $train_x = $train->slice_axis(axis => 1, begin => 0, end => $train->shape->[1] - 1);
    my $train_y = $train->slice_axis(axis => 1, begin => $train->shape->[1] - 1, end => $train->shape->[1]);

    my $test_x  = $test->slice_axis(axis => 1, begin => 0, end => $test->shape->[1] - 1);
    my $test_y  = $test->slice_axis(axis => 1, begin => $test->shape->[1] - 1, end => $test->shape->[1]);

    my $predicted = $algorithm->('sml', $train, $test_x, @_);

    my $actual    = $test_y->reshape([$test_y->shape->[0]]);  
    my $score;
    if (defined $args{metric}) {
        if ($args{metric} =~ /accuracy/i) {
            $score = sml->accuracy_metric($actual, $predicted);
        } elsif ($args{metric} =~ /rmse/i) {
            $score = sml->rmse_metric($actual, $predicted);
        }
    } else {
        $score = (grep { $_ =~ /\d+\.\d+/ } @$actual)
               ? sml->rmse_metric($actual, $predicted)
               : sml->accuracy_metric($actual, $predicted);
    }

    return wantarray ? ($score, $train, $test, $actual, $predicted) : $score;
}

sml->add_to_class('evaluate_algorithm_train_test_split', \&evaluate_algorithm_train_test_split);

*sml::evaluate_algorithm_train_test_split

In [None]:
mx->random->seed(1);
my $filename = '../data/pima-indians-diabetes.csv';

my $dataset = sml->load_csv($filename);
for my $i (0 .. $#{$dataset->[0]} - 1) {
    sml->str_column_to_float($dataset, $i);
}

$dataset = mx->nd->array($dataset);

my $split = 0.6;
my ($accuracy, $train, $test, $actual, $predicted) =
    sml->evaluate_algorithm_train_test_split($dataset,
                                            \&sml::zero_rule_algorithm_classification,
                                            split => $split,
                                            metric => 'accuracy');
print "Accuracy: $accuracy\n";

Accuracy: 0.6558441558441559


1

In [6]:
my ($unique, $matrix) = sml->confusion_matrix($actual, $predicted);
sml->print_confusion_matrix($unique, $matrix);

A/P 0 1
0 202 0
1 106 0


1

In [None]:
sub evaluate_algorithm_cross_validation_split {
    my ($self, $dataset, $algorithm, %args) = @_; 
    $args{n_folds} //= 10;
    $args{metric}  //= undef;

    my @scores;
    my @train_losses;
    my @test_losses;
    my @actuals_per_fold;
    my @predictions_per_fold;

    my $folds_ref = $self->cross_validation_split($dataset, n_folds => $args{n_folds});
    my @folds = @$folds_ref;

    my $is_mxnet_ndarray = (ref $dataset eq 'AI::MXNet::NDArray');

    for my $i (0 .. $#folds) {
        my $test_fold = $folds[$i];
        my @train_folds_list;
        for my $j (0 .. $#folds) {
            push @train_folds_list, $folds[$j] unless $j == $i;
        }

        my ($train_set, $test_set_for_algo, $actual_fold_labels);

        if ($is_mxnet_ndarray) {
            $train_set = @train_folds_list ? mx->nd->concat(@train_folds_list, dim => 0) : mx->nd->array([]);

            $actual_fold_labels = $test_fold->at(all(), -1)->reshape([$test_fold->shape->[0]]);
            $test_set_for_algo = $test_fold; 

        } else { 
            $train_set = [];
            foreach my $fold_ref (@train_folds_list) {
                push @$train_set, @$fold_ref;
            }

            $actual_fold_labels = [map { $_->[-1] } @$test_fold];

            $test_set_for_algo = [];
            for my $row (@$test_fold) {
                my @row_copy = @$row;
                push @$test_set_for_algo, [@row_copy];
                $row_copy[-1] = undef;
            }
        }

        my ($predicted_fold, $current_train_loss, $current_test_loss) =
            $algorithm->('sml', $train_set, $test_set_for_algo, %args);

        my @actual_perl;
        if ($is_mxnet_ndarray) {
             for my $idx (0 .. $actual_fold_labels->shape->[0] - 1) {
                 push @actual_perl, $actual_fold_labels->at($idx)->asscalar();
             }
        } else {
             @actual_perl = @$actual_fold_labels;
        }

        my @predicted_perl;
        if ($is_mxnet_ndarray) {
            for my $idx (0 .. $predicted_fold->shape->[0] - 1) {
                my $val = $predicted_fold->at($idx)->asscalar();
                if ($args{metric} && $args{metric} =~ /rmse/i || !defined $args{metric} && (grep { $_ =~ /\d+\.\d+/ } @actual_perl)) {
                    push @predicted_perl, sprintf '%.1f', $val;
                } else {
                    push @predicted_perl, $val;
                }
            }
        } else {
            @predicted_perl = @$predicted_fold;
        }

        my $score;
        if (defined $args{metric}) {
            if ($args{metric} =~ /accuracy/i) {
                $score = sml->accuracy_metric(\@actual_perl, \@predicted_perl);
            } elsif ($args{metric} =~ /rmse/i) {
                $score = sml->rmse_metric(\@actual_perl, \@predicted_perl);
            }
        } else {
            $score = (grep { $_ =~ /\d+\.\d+/ } @actual_perl)
                   ? sml->rmse_metric(\@actual_perl, \@predicted_perl)
                   : sml->accuracy_metric(\@actual_perl, \@predicted_perl);
        }

        push @scores, $score;
        push @train_losses, $current_train_loss;
        push @test_losses, $current_test_loss;
        push @actuals_per_fold, \@actual_perl;
        push @predictions_per_fold, \@predicted_perl;
    }

    return wantarray ? (\@scores, \@train_losses, \@test_losses, \@actuals_per_fold, \@predictions_per_fold) : \@scores;
}

sml->add_to_class('evaluate_algorithm_cross_validation_split', \&evaluate_algorithm_cross_validation_split);

*sml::evaluate_algorithm_cross_validation_split

In [None]:
mx->random->seed(1);
$filename = '../data/pima-indians-diabetes.csv';
$dataset = sml->load_csv($filename);
$dataset = mx->nd->array($dataset);

my $n_fold = 5; #
my ($scores, $train_losses, $test_losses, $actuals, $predictions) = sml->evaluate_algorithm_cross_validation_split(
    $dataset,
    \&sml::zero_rule_algorithm_classification,
    n_folds => $n_fold,
    metric  => 'accuracy');

printf "Scores: %s\n", join(' ', @$scores); #
printf "Mean Accuracy: %0.2f\n", sum(@$scores) / scalar(@$scores); #


Fold 0: Working with MXNet NDArray.
Fold 1: Working with MXNet NDArray.
Fold 2: Working with MXNet NDArray.
Fold 3: Working with MXNet NDArray.
Fold 4: Working with MXNet NDArray.
Scores: 0.6274509803921569 0.6862745098039216 0.6274509803921569 0.6797385620915033 0.6339869281045752
Mean Accuracy: 0.65


1

In [None]:
for my $item (zip @$scores, @$actuals, @$predictions) {
    my ($accuracy, $actual_fold, $predicted_fold) = @$item;
    my ($unique, $matrix) = sml->confusion_matrix($actual_fold, $predicted_fold);
    printf "Accuracy: %0.2f%%\n", $accuracy * 100; 
    sml->print_confusion_matrix($unique, $matrix); 
}


Accuracy: 62.75%
A/P 0 1
0 96 0
1 57 0
Accuracy: 68.63%
A/P 0 1
0 105 0
1 48 0
Accuracy: 62.75%
A/P 0 1
0 96 0
1 57 0
Accuracy: 67.97%
A/P 0 1
0 104 0
1 49 0
Accuracy: 63.40%
A/P 0 1
0 97 0
1 56 0


1