In [4]:
#DEIVCE + MODEL READY

import torch
import torch.nn as nn
import torchaudio
import sounddevice as sd
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.autograd import Variable
from IPython.display import Audio


try:
    # MULTI GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = resnet18(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(512, 10)
    model = nn.DataParallel(model)  # Add this line
    model.load_state_dict(torch.load('ResNet18_Best.pth', map_location=device))
    model = model.to(device)
    model = model.eval()

    state_dict = torch.load('ResNet18_Best.pth', map_location=device)
    new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
    model.load_state_dict(new_state_dict)

    print("Model successfully loaded. + GPU")
except:
    #One GPU or CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = resnet18(pretrained=False)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(512, 10)
    try:
        state_dict = torch.load('ResNet18_Best.pth', map_location=device)
        new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
        model.load_state_dict(new_state_dict)
        model = model.to(device)
        model = model.eval()
        print("Model successfully loaded.+CPU")
    except:
        print("Failed to load the model. Please check the model file.")



SoundAnalysis thread started
Model successfully loaded.+CPU


In [5]:
#Transform

SAMPLE_RATE = 22050

class MonoToColor(nn.Module):
    def __init__(self, num_channels=3):
        super(MonoToColor, self).__init__()
        self.num_channels = num_channels

    def forward(self, tensor):
        return tensor.repeat(self.num_channels, 1, 1)

# Apply the same transformation as used during training
transformation = transforms.Compose([
    torchaudio.transforms.MelSpectrogram(sample_rate=SAMPLE_RATE, n_mels=128),
    torchaudio.transforms.AmplitudeToDB(stype='power', top_db=80),
    MonoToColor()
])

In [6]:
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import QThread, pyqtSignal
import torch
import torch.nn as nn
import torchaudio
import sounddevice as sd
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.autograd import Variable

sample_rate = SAMPLE_RATE
target_sample_rate = SAMPLE_RATE

predicted_label = 'a'

# Sound Analysis class running on a separate thread
class SoundAnalysis(QThread):
    # Define a pyqtSignal with str type, which will be used to send the analysis results to the main thread
    result_signal = pyqtSignal(str)

    def __init__(self, model, device, transformation, sample_rate):
        QThread.__init__(self)
        self.model = model
        self.device = device
        self.transformation = transformation
        self.sample_rate = sample_rate

    def run(self):
        global predicted_label
        while True:
            try:
                print("SoundAnalysis thread started")  # Print message at start of thread

                import time
                import torch.nn.functional as F

                class_labels = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 
                                'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']
                
                # Record a 2 seconds mono audio at the specified sample rate
                duration = 2.0  # seconds
                recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1) 
                sd.wait()

                # Convert to PyTorch tensor and switch channels and frames
                recording = torch.from_numpy(recording).float()
                recording = torch.transpose(recording, 0, 1)

                # Resample if necessary
                if sample_rate != target_sample_rate:
                    resampler = torchaudio.transforms.Resample(sample_rate, target_sample_rate)
                    recording = resampler(recording)

                # Mix down if necessary
                if recording.shape[0] > 1:
                    recording = torch.mean(recording, dim=0, keepdim=True)

                # Cut or pad if necessary
                if recording.shape[1] > target_sample_rate:
                    recording = recording[:, :target_sample_rate]
                elif recording.shape[1] < target_sample_rate:
                    num_missing_samples = target_sample_rate - recording.shape[1]
                    last_dim_padding = (0, num_missing_samples)
                    recording = nn.functional.pad(recording, last_dim_padding)

                # Apply transformation
                recording = transformation(recording)

                # Make the prediction
                model.eval()  # set model to evaluation mode
                with torch.no_grad():  # deactivate autograd engine to reduce memory usage and speed up computations
                    recording = recording.to(device)
                    outputs = model(recording[None, ...])
                    probabilities = F.softmax(outputs, dim=1)  # apply softmax to output
                    _, predicted = torch.max(outputs, 1)

                # Get predicted label and its corresponding probability
                predicted_label = class_labels[predicted.item()]
                predicted_confidence = probabilities[0, predicted.item()].item()  # get the probability of the predicted class

                # Only print the output if the confidence is greater than 80% and the label is not in the specified list
                if predicted_confidence >= 0.0 and predicted_label not in ['air_conditioner', 'children_playing', 'street_music']:#THE EXCLUDED LABLES
                    print(f"The predicted class is: {predicted_label}, with confidence: {predicted_confidence:.2%}")  # Print message before emitting signal
                    self.result_signal.emit(f"The predicted class is: {predicted_label}")
            except:
                print("error occurred")
            #except Exception as e:
            #    print(f"Exception occurred in SoundAnalysis thread: {e}")
            #pass

class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(1920, 1080)
        self.received_text = ""
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setGeometry(QtCore.QRect(30, 430, 971, 211))
        self.label.setAlignment(QtCore.Qt.AlignCenter)
        self.label.setObjectName("label")

        self.label_2 = QtWidgets.QLabel(self.centralwidget)
        self.label_2.setGeometry(QtCore.QRect(30, 60, 971, 351))
        self.label_2.setText("")
        self.label_2.setObjectName("image")
        
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 29))
        self.menubar.setObjectName("menubar")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

        # Initialize SoundAnalysis and connect the result_signal with the updateLabel function
        self.sound_analysis = SoundAnalysis(model, device, transformation, SAMPLE_RATE)
        self.sound_analysis.result_signal.connect(self.updateLabel)
        self.sound_analysis.result_signal.connect(self.updateLabel2)
        self.sound_analysis.start()  # Start the sound analysis thread
        
    def updateLabel2(self, text):
        full_file_name = f"{predicted_label}.png"
        self.label_2.setPixmap(QtGui.QPixmap(full_file_name))  
        #self.label.setText(predicted_label)
        print(predicted_label)

    def updateLabel(self, text):
        print("Received signal")  # Print message when signal is received
        self.label.setText(predicted_label)
        #self.label.setText(predicted_label)
        print(predicted_label)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
        self.label.setFont(QtGui.QFont("AppleSystemUIFont",20))
        self.label.setStyleSheet("Color : black")
        



if __name__ == "__main__":
    import sys
    app = QtWidgets.QApplication(sys.argv)
    MainWindow = QtWidgets.QMainWindow()
    ui = Ui_MainWindow()
    ui.setupUi(MainWindow)
    MainWindow.show()
    sys.exit(app.exec_())

: 

: 

In [None]:
from PyQt5.QtGui import QFontDatabase

print(QFontDatabase().families())


['.AppleSystemUIFont', 'Academy Engraved LET', 'Al Bayan', 'Al Nile', 'Al Tarikh', 'American Typewriter', 'Andale Mono', 'Apple Braille', 'Apple Chancery', 'Apple Color Emoji', 'Apple SD Gothic Neo', 'Apple Symbols', 'AppleGothic', 'AppleMyungjo', 'Arial', 'Arial Black', 'Arial Hebrew', 'Arial Hebrew Scholar', 'Arial Narrow', 'Arial Rounded MT Bold', 'Arial Unicode MS', 'Avenir', 'Avenir Next', 'Avenir Next Condensed', 'Ayuthaya', 'Baghdad', 'Bangla MN', 'Bangla Sangam MN', 'Baskerville', 'Beirut', 'Big Caslon', 'Bodoni 72', 'Bodoni 72 Oldstyle', 'Bodoni 72 Smallcaps', 'Bodoni Ornaments', 'Bradley Hand', 'Brush Script MT', 'Chalkboard', 'Chalkboard SE', 'Chalkduster', 'Charter', 'Cochin', 'Comic Sans MS', 'Copperplate', 'Corsiva Hebrew', 'Courier New', 'Damascus', 'DecoType Naskh', 'Devanagari MT', 'Devanagari Sangam MN', 'Didot', 'DIN Alternate', 'DIN Condensed', 'Diwan Kufi', 'Diwan Thuluth', 'Euphemia UCAS', 'Farah', 'Farisi', 'Futura', 'Galvji', 'GB18030 Bitmap', 'Geeza Pro', 'Gene