In [245]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
from torchinfo import summary
import pandas as pd
import os
from pathlib import Path

import NeuronalNetwork as CNNet

In [246]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

class CNNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(51136, 50)
        self.fc2 = nn.Linear(50, 2)


    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        #x = x.view(x.size(0), -1)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        return F.log_softmax(x,dim=1)  


Using cuda device


In [247]:
def load_audio_file(path: str):

    dataset = []
    walker = sorted(str(p) for p in Path(path).glob(f'*.wav'))

    for i, file_path in enumerate(walker):
        path, filename = os.path.split(file_path)    
        # Load audio
        waveform, sample_rate = torchaudio.load(file_path)
        dataset.append([waveform, sample_rate])
        
    return dataset

In [248]:
model = CNNet()
model.load_state_dict(torch.load('./data/model.pth'))
model.eval()

trainset_speechcommand = load_audio_file('./testaudio')

testloader = torch.utils.data.DataLoader(trainset_speechcommand, batch_size=1,
                                            shuffle=True, num_workers=0)


In [280]:
spectrogram_tensor = None
directory = f'./data/testimage/order'
if(os.path.isdir(directory)):
    print("Data exists")
else:
    os.makedirs(directory, mode=0o777, exist_ok=True)
spectorgram_value = None
for data in testloader: 
    waveform = data[0]
    spectrogram_tensor = torchaudio.transforms.Spectrogram()(waveform)
    fig = plt.figure()
    print(spectrogram_tensor[0].log2()[0,:,:].numpy())
    from sklearn import preprocessing
    scaler = preprocessing.MinMaxScaler
    normalized = scaler.fit_transform(spectrogram_tensor[0].log2()[0,:,:].numpy())
    print(normalized)
    

Data exists
[[-20.323214  -24.938257  -22.680481  ...  -9.652863  -12.82978
   -7.7300043]
 [-21.841885  -21.756824  -20.087286  ...  -8.05906   -10.274339
   -8.517631 ]
 [-21.279036  -23.433155  -18.039644  ...  -7.9789057  -8.450219
  -11.861316 ]
 ...
 [-26.869009  -25.95877   -25.062504  ... -24.650267  -28.011158
  -30.295326 ]
 [-40.719624  -24.163355  -25.175873  ... -25.848898  -27.623299
  -26.371065 ]
 [-25.931694  -23.238054  -26.589584  ... -23.138702  -25.016146
  -24.734066 ]]


ValueError: Found array with dim 3. the normalize function expected <= 2.

<Figure size 640x480 with 0 Axes>

In [263]:
dataset = datasets.ImageFolder(
    root="./data/testimage/",
    transform=transforms.Compose([transforms.Resize((201,81)),
                                  transforms.ToTensor()])
)
print(dataset[0])

(tensor([[[0.1412, 0.1804, 0.1608,  ..., 0.2431, 0.1490, 0.3255],
         [0.1529, 0.1529, 0.1412,  ..., 0.3098, 0.2235, 0.2941],
         [0.1490, 0.1686, 0.1255,  ..., 0.3176, 0.2941, 0.1725],
         ...,
         [0.2000, 0.1922, 0.1804,  ..., 0.1804, 0.2118, 0.2392],
         [0.2667, 0.1725, 0.1843,  ..., 0.1922, 0.2078, 0.1961],
         [0.1922, 0.1647, 0.1961,  ..., 0.1647, 0.1804, 0.1804]],

        [[0.5216, 0.4196, 0.4706,  ..., 0.7373, 0.6745, 0.7725],
         [0.4863, 0.4863, 0.5255,  ..., 0.7647, 0.7255, 0.7608],
         [0.4980, 0.4549, 0.5686,  ..., 0.7686, 0.7608, 0.6941],
         ...,
         [0.3765, 0.4000, 0.4196,  ..., 0.4275, 0.3529, 0.2980],
         [0.0039, 0.4392, 0.4157,  ..., 0.4000, 0.3608, 0.3882],
         [0.4000, 0.4588, 0.3843,  ..., 0.4627, 0.4196, 0.4235]],

        [[0.5529, 0.5569, 0.5569,  ..., 0.4510, 0.5059, 0.4039],
         [0.5569, 0.5569, 0.5529,  ..., 0.4118, 0.4627, 0.4235],
         [0.5569, 0.5569, 0.5490,  ..., 0.4078, 0.4235, 0

In [251]:
test_dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=15,
    num_workers=2,
    shuffle=True
)


In [252]:
classes = [
    "no",
    "yes"
]


In [253]:
model.eval()
for batch, (X,Y) in enumerate(test_dataloader):
    pred = model(X)
    predicted = classes[pred[0].argmax(0)]
    print(f'Predicted: "{predicted}"')

Predicted: "no"
