#### Nuget Packages

In [None]:
// ML.NET 
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.ML.Vision,1.4.0"
#r "nuget:Microsoft.ML.ImageAnalytics,1.4.0"
#r "nuget:Microsoft.Data.Analysis"
#r "nuget:SciSharp.TensorFlow.Redist,1.14.0"

// Utility
#r "nuget:Spectrogram"
#r "nuget:System.Windows.Extensions"

In [None]:
#r "nuget: NAudio, 1.9.0"

### Namespaces

In [None]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Vision;
using Microsoft.AspNetCore.Html;
using Microsoft.DotNet.Interactive.Formatting;
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;

using System.IO;
using System.Net;
using Spectrogram;
using System.Drawing;
using System.Drawing.Imaging;
using System.Media;
using System.Threading;

### Constants and Variables

In [None]:
// Constants
private string MODEL_INPUT_NAME = "modelInput";
private string MODEL_OUTPUT_NAME = "modelOutput";
private string MODEL_FILE_NAME = "SoundClassifier.zip";
private string ONNX_MODEL_FILE_NAME = "SoundClassifier.onnx";
private string AUDIO_FILE_EXTENSION = ".wav";
private string IMAGE_FILE_EXTENSION = ".png";
private int IMAGE_WIDTH = 288;
private int IMAGE_HEIGHT = 432;
private int FFT_SIZE = 8192;
private int STEP_SIZE = 200;
private int MAX_FREQUENCY = 3000;
private int MEL_BIN_COUNT = 250;

// Dataset
var CurrentWorkingDirectory = Directory.GetCurrentDirectory();
var TempDatasetPath = @"D:\dataset";
var DatasetPath = Path.Combine(TempDatasetPath, "Data");
var AudioFilePath = Path.Combine(DatasetPath, "GroupSound");
var AudioImagesFilePath = Path.Combine(DatasetPath, "GroupImages");
var AudioImagesFilePathAll = Path.Combine(DatasetPath, "GroupImagesAll");

var TrainAudioImagesFilePath = Path.Combine(AudioImagesFilePath, "train");
var TestAudioImagesFilePath = Path.Combine(AudioImagesFilePath, "test");
var ValAudioImagesFilePath = Path.Combine(AudioImagesFilePath, "val");

var DirectoryPath10 = Path.Combine(AudioFilePath, "10");
var SampleAudioFilePath = Path.Combine(DirectoryPath10, "F_BAC01_MC_MN_SIM01_102.wav");

var Features3SecondsPath = Path.Combine(DatasetPath, "features_3_sec.csv");

var ModelPath = Path.Combine(CurrentWorkingDirectory, "assets", MODEL_FILE_NAME);


display(SampleAudioFilePath)

### Utility Functions

#### Formatter

In [None]:
// Formats the table
Formatter.Register(typeof(Microsoft.Data.Analysis.DataFrame),(dataFrame, writer) =>
{
    var df = dataFrame as Microsoft.Data.Analysis.DataFrame;
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent)th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 10;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }

    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));

    writer.Write(t);
}, "text/html");

#### Play Audio

In [None]:
public void PlayAudio(string file)
{
    // Windows OS Specific
    SoundPlayer player = new SoundPlayer(file);
    player.Play();
    
    Thread.Sleep(5000);
    
    player.Stop();
}

#### Display Image

In [None]:
public void DisplayImage(string file)
{
    var data = File.ReadAllBytes(file);
    // Convert so we can display it as HTML
    var b64 = Convert.ToBase64String(data);
    display(HTML($"<img src=\"data:image/png;base64,{b64}\"></img>")); // last call without ; gets displayed
}

#### Plot Spectrogram

In [None]:
public void PlotSpectrogram(int sampleRate, IEnumerable<double> audio, string saveFilePath)
{
    var sg = new SpectrogramGenerator(sampleRate, fftSize: FFT_SIZE, stepSize: STEP_SIZE, maxFreq: MAX_FREQUENCY);
    sg.Add(audio);
    sg.Colormap = Colormap.Viridis;
    sg.SaveImage(saveFilePath);
}

#### Plot Mel-Spectrogram

In [None]:
public void PlotMelSpectrogram(int sampleRate, IEnumerable<double> audio, string saveFilePath)
{
    var sg = new SpectrogramGenerator(sampleRate, fftSize: FFT_SIZE, stepSize: STEP_SIZE, maxFreq: MAX_FREQUENCY);
    sg.Add(audio);
    Bitmap bmp = sg.GetBitmapMel(melBinCount: MEL_BIN_COUNT);
    bmp.Save(saveFilePath, ImageFormat.Png);
    display($"File saved at {saveFilePath}");
}

#### Create Directory with all images

In [None]:
public void CopyAllAudioImagesToADirectory()
{
    var isAllImageDatasetExist = Directory.Exists(AudioImagesFilePathAll);
    display(isAllImageDatasetExist);
    if (isAllImageDatasetExist == false)
    {
        Directory.CreateDirectory(AudioImagesFilePathAll);
        var labels = Directory.EnumerateDirectories(AudioImagesFilePath);
        foreach (var label in labels)
        {
            // Copy Files
            var sourcePath = Path.Combine(AudioImagesFilePath, label);
            var allFiles = Directory.EnumerateFiles(sourcePath).ToList();
            foreach (var file in allFiles)
            {
                File.Copy(file, Path.Combine(AudioImagesFilePathAll, Path.GetFileName(file)));
            }
        }
    }
}

#### Split Dataset

In [None]:
private static void SplitDataset(string directory, string[] labels)
{
    var trainPath = Path.Combine(directory, "train");
    var testPath = Path.Combine(directory, "test");
    var validationPath = Path.Combine(directory, "val");

    // Train
    if (!Directory.Exists(trainPath))
    {
        Directory.CreateDirectory(trainPath);
        foreach (var label in labels)
        {
            var subDirectory = Path.Combine(trainPath, label);
            Directory.CreateDirectory(subDirectory);

            // Copy Files
            var sourcePath = Path.Combine(directory, label);
            var top70Files = Directory.EnumerateFiles(sourcePath).Take(70);
            foreach (var file in top70Files)
            {
                File.Copy(file, Path.Combine(subDirectory, Path.GetFileName(file)));
            }
        }
    }

    // Test
    if (!Directory.Exists(testPath))
    {
        Directory.CreateDirectory(testPath);
        foreach (var label in labels)
        {
            var subDirectory = Path.Combine(testPath, label);
            Directory.CreateDirectory(subDirectory);

            // Copy Files
            var sourcePath = Path.Combine(directory, label);
            var allFiles = Directory.EnumerateFiles(sourcePath).ToList();
            var testFiles = allFiles.GetRange(70, 20);
            foreach (var file in testFiles)
            {
                File.Copy(file, Path.Combine(subDirectory, Path.GetFileName(file)));
            }
        }
    }

    // Validation
    if (!Directory.Exists(validationPath))
    {
        Directory.CreateDirectory(validationPath);
        foreach (var label in labels)
        {
            var subDirectory = Path.Combine(validationPath, label);
            Directory.CreateDirectory(subDirectory);

            // Copy Files
            var sourcePath = Path.Combine(directory, label);
            var allFiles = Directory.EnumerateFiles(sourcePath).ToList();
            var testFiles = allFiles.GetRange(90, 10);
            foreach (var file in testFiles)
            {
                File.Copy(file, Path.Combine(subDirectory, Path.GetFileName(file)));
            }
        }
    }
}

### Load Dataset

In [None]:
var isDatasetAlreadyDownloaded = Directory.Exists(DatasetPath);
display(isDatasetAlreadyDownloaded)

In [None]:
var soundDataFrame = Microsoft.Data.Analysis.DataFrame.LoadCsv(Features3SecondsPath);

In [None]:
soundDataFrame.Head(5)

In [None]:
soundDataFrame.Tail(5)

In [None]:
soundDataFrame.Info()

In [None]:
soundDataFrame.Description()

#### Classes

In [None]:
Microsoft.Data.Analysis.DataFrame groupedDataFrameLabel = soundDataFrame.GroupBy("label").Sum("label");
List<string> classes = new List<string>();
foreach(string label in groupedDataFrameLabel["label"])
{
    classes.Add(label);
}

In [None]:
classes

### Analyze Audio Data

#### Metadata

In [None]:
// Load Audio file
public (double[] audio, int sampleRate) ReadWavMono(string filePath, double multiplier = 16_000)
{
    using var afr = new NAudio.Wave.AudioFileReader(filePath);
    int sampleRate = afr.WaveFormat.SampleRate;
    int bytesPerSample = afr.WaveFormat.BitsPerSample / 8;
    int sampleCount = (int)(afr.Length / bytesPerSample);
    int channelCount = afr.WaveFormat.Channels;
    long fileSizeInKB = afr.Length / 1024;
    
    var audio = new List<double>(sampleCount);
    var buffer = new float[sampleRate * channelCount];
    int samplesRead = 0;
    while ((samplesRead = afr.Read(buffer, 0, buffer.Length)) > 0)
        audio.AddRange(buffer.Take(samplesRead).Select(x => x * multiplier));
        
    display("************ AUDIO METADATA ***************");
    display($"Sample Rate              : {sampleRate}");
    display($"Number of Channels       : {channelCount}");
    display($"Bytes per sample         : {bytesPerSample}");
    display($"Sample Count             : {sampleCount}");
    display($"File Size in KB          : {fileSizeInKB} KB");
    display("******************************************");
    
    return (audio.ToArray(), sampleRate);
}


In [None]:
(double[] audio, int sampleRate) = ReadWavMono(SampleAudioFilePath);

#### Play Audio

In [None]:
PlayAudio(SampleAudioFilePath)

#### Spectrogram

In [None]:
string savePath = $"{Path.GetFileNameWithoutExtension(SampleAudioFilePath)}-spectro.png";
PlotSpectrogram(sampleRate, audio, savePath);

In [None]:
display(savePath);
DisplayImage(savePath);

#### Mel Spectrogram

In [None]:
string melSavePath = $"{Path.GetFileNameWithoutExtension(SampleAudioFilePath)}-melspectro";
PlotMelSpectrogram(sampleRate, audio, savePath);

## Machine learning

#### Prepare Dataset

In [None]:
display(Directory.GetCurrentDirectory());

In [None]:
CopyAllAudioImagesToADirectory();

### Load Dataset

#### Data Classes

In [None]:
public struct ImageSettings
{
    public const int Height = 288;
    public const int Width = 432;
}

In [None]:
public class ImageData
{
    public string ImagePath { get; set; }

    public string Label { get; set; }
}

In [None]:
public class ModelInput
{
    public byte[] Image { get; set; }
    
    public UInt32 LabelAsKey { get; set; }

    public string ImagePath { get; set; }

    public string Label { get; set; }
}

In [None]:
public class ModelOutput
{
    public string ImagePath { get; set; }

    public string Label { get; set; }

    public string PredictedLabel { get; set; }
}

In [None]:
public void DisplayOutcome(ModelOutput prediction)
{
    display("************ Classification Outcome *************");
    display($"File             : {Path.GetFileName(prediction.ImagePath)}");
    display($"Expected Genre   : {prediction.Label}");
    display($"Predicted Genre: : {prediction.PredictedLabel}");
    display("*************************************************");
}

#### Load Images from Directory

In [None]:
public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder, bool useFolderNameAsLabel = true)
{
    var files = Directory.GetFiles(folder, "*",
        searchOption: SearchOption.AllDirectories);

    foreach (var file in files)
    {
        if ((Path.GetExtension(file) != ".jpg") && (Path.GetExtension(file) != ".png"))
            continue;

        var label = Path.GetFileName(file);

        if (useFolderNameAsLabel)
            label = Directory.GetParent(file).Name;
        else
        {
            for (int index = 0; index < label.Length; index++)
            {
                if (!char.IsLetter(label[index]))
                {
                    label = label.Substring(0, index);
                    break;
                }
            }
        }

        yield return new ImageData()
        {
            ImagePath = file,
            Label = label
        };
    }
}

In [None]:
var images = LoadImagesFromDirectory(AudioImagesFilePath).Take(500);
display(images.Count());

#### Build ML Pipeline

In [None]:
var mlContext = new MLContext(seed: 1);
var dataView = mlContext.Data.LoadFromEnumerable(images);
dataView = mlContext.Data.ShuffleRows(dataView);

In [None]:
var imagesDataset = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey",
                                        inputColumnName: "Label",
                                        keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)
                                        .Append(mlContext.Transforms.LoadRawImageBytes(outputColumnName: "Image",
                                                                    imageFolder: AudioImagesFilePathAll,
                                                                    inputColumnName: "ImagePath"))
                                        .Fit(dataView)
                                        .Transform(dataView);

##### Split Dataset : Train/Test - 70:30

In [None]:
var trainTestSplit = mlContext.Data.TrainTestSplit(imagesDataset, 0.3);

var testSet = trainTestSplit.TestSet;
var trainSet = trainTestSplit.TrainSet;

In [None]:
display(mlContext.Data.CreateEnumerable<ModelInput>(testSet,reuseRowObject:true).Count());
display(mlContext.Data.CreateEnumerable<ModelInput>(trainSet,reuseRowObject:true).Count());

In [None]:
var options = new ImageClassificationTrainer.Options()
{
    FeatureColumnName = "Image",
    LabelColumnName = "LabelAsKey",
    Arch = ImageClassificationTrainer.Architecture.ResnetV250,
    Epoch = 2,
    BatchSize = 10,
    LearningRate = 0.01f,
    MetricsCallback = (metrics) => Console.WriteLine(metrics),
    ValidationSet = testSet
};

var trainingPipeline = mlContext.MulticlassClassification.Trainers.ImageClassification(options)
        .Append(mlContext.Transforms.Conversion.MapKeyToValue(
            outputColumnName: "PredictedLabel",
            inputColumnName: "PredictedLabel"));

### Train Model

In [None]:
ITransformer model = trainingPipeline.Fit(trainSet);

### Evaluate Model

In [None]:
var predicitions = model.Transform(testSet);
var metrics = mlContext.MulticlassClassification.Evaluate(predicitions, labelColumnName: "LabelAsKey", predictedLabelColumnName: "PredictedLabel");
display(metrics);

### Prediction

In [None]:
var testInputs = mlContext.Data.CreateEnumerable<ModelInput>(testSet,reuseRowObject:true);
display(testInputs.Count());

In [None]:
var predictionEngine = mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(model);
ModelInput image = testInputs.ElementAt(6);

var prediction = predictionEngine.Predict(image);

#### Success

In [None]:
DisplayOutcome(prediction);

#### Failure

In [None]:
ModelInput image = testInputs.ElementAt(5);

var prediction = predictionEngine.Predict(image);

In [None]:
DisplayOutcome(prediction);

### Save Model

In [None]:
mlContext.Model.Save(model, trainSet.Schema, MODEL_FILE_NAME);

In [None]:
// Model File size
FileInfo fi = new FileInfo(MODEL_FILE_NAME);
display($"Model File size: {fi.Length / (1024 * 1024)} MB");